Skip to content

Commit 6d39afc

Browse files
authored
šŸ› Source Facebook Marketing: Convert values' types according to schema types (#4978)
* Convert values' types according to schema types * Put streams back to `configured_catalog.json` Put back `ads_insights` and `ads_insights_age_and_gender` streams. * Pickup changes from #5946 * Implement change request + fix previous PR * Update schema * Remove items_type from convert_to_schema_types() * Bump connectors version
1 parent 7eb5577 commit 6d39afc

File tree

11 files changed

+78
-38
lines changed

11 files changed

+78
-38
lines changed

ā€Žairbyte-config/init/src/main/resources/config/STANDARD_SOURCE_DEFINITION/e7778cfc-e97c-4458-9ecb-b4f2bba8946c.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"sourceDefinitionId": "e7778cfc-e97c-4458-9ecb-b4f2bba8946c",
33
"name": "Facebook Marketing",
44
"dockerRepository": "airbyte/source-facebook-marketing",
5-
"dockerImageTag": "0.2.16",
5+
"dockerImageTag": "0.2.17",
66
"documentationUrl": "https://docs.airbyte.io/integrations/sources/facebook-marketing",
77
"icon": "facebook.svg"
88
}

ā€Žairbyte-config/init/src/main/resources/seed/source_definitions.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
- sourceDefinitionId: e7778cfc-e97c-4458-9ecb-b4f2bba8946c
138138
name: Facebook Marketing
139139
dockerRepository: airbyte/source-facebook-marketing
140-
dockerImageTag: 0.2.16
140+
dockerImageTag: 0.2.17
141141
documentationUrl: https://docs.airbyte.io/integrations/sources/facebook-marketing
142142
icon: facebook.svg
143143
- sourceDefinitionId: 010eb12f-837b-4685-892d-0a39f76a98f5

ā€Žairbyte-integrations/connectors/source-facebook-marketing/Dockerfile

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ RUN pip install .
1212
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
1313
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
1414

15-
LABEL io.airbyte.version=0.2.16
15+
LABEL io.airbyte.version=0.2.17
1616
LABEL io.airbyte.name=airbyte/source-facebook-marketing

ā€Žairbyte-integrations/connectors/source-facebook-marketing/acceptance-test-config.yml

+6-5
Original file line numberDiff line numberDiff line change
@@ -8,18 +8,19 @@ tests:
88
- config_path: "secrets/config.json"
99
status: "succeed"
1010
- config_path: "integration_tests/invalid_config.json"
11-
status: "exception"
11+
status: "failed"
1212
discovery:
1313
- config_path: "secrets/config.json"
1414
basic_read:
1515
- config_path: "secrets/config.json"
1616
configured_catalog_path: "integration_tests/configured_catalog.json"
17-
# FB serializes numeric fields as strings
18-
validate_schema: no
17+
timeout_seconds: 600
1918
incremental:
2019
- config_path: "secrets/config.json"
2120
configured_catalog_path: "integration_tests/configured_catalog_without_insights.json"
22-
future_state_path: "integration_tests/abnormal_state.json"
21+
future_state_path: "integration_tests/future_state.json"
2322
full_refresh:
2423
- config_path: "secrets/config.json"
25-
configured_catalog_path: "integration_tests/configured_catalog.json"
24+
# TODO Change below `configured_catalog_without_insights.json` to `configured_catalog.json` after October 7 2021
25+
# because all running campaigns should be finished by that time.
26+
configured_catalog_path: "integration_tests/configured_catalog_without_insights.json"
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,5 @@
4242
"ads_insights_action_types": {
4343
"date_start": "2121-07-25T13:34:26Z",
4444
"include_deleted": true
45-
},
46-
"ads_insights_action_types": {
47-
"date_start": "2021-07-25T13:34:26Z",
48-
"include_deleted": true
4945
}
5046
}

ā€Žairbyte-integrations/connectors/source-facebook-marketing/integration_tests/invalid_config.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
"start_date": "2021-04-01T00:00:00Z",
33
"account_id": "account",
44
"access_token": "wrong_token",
5-
"include_deleted": "true"
5+
"include_deleted": true
66
}

ā€Žairbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/ad_sets.json

+3-18
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,10 @@
4848
"format": "date-time"
4949
},
5050
"daily_budget": {
51-
"type": ["null", "number"],
52-
"maximum": 100000000000000000000000000000000,
53-
"minimum": -100000000000000000000000000000000,
54-
"multipleOf": 0.000001,
55-
"exclusiveMaximum": true,
56-
"exclusiveMinimum": true
51+
"type": ["null", "number"]
5752
},
5853
"budget_remaining": {
59-
"type": ["null", "number"],
60-
"maximum": 100000000000000000000000000000000,
61-
"minimum": -100000000000000000000000000000000,
62-
"multipleOf": 0.000001,
63-
"exclusiveMaximum": true,
64-
"exclusiveMinimum": true
54+
"type": ["null", "number"]
6555
},
6656
"effective_status": {
6757
"type": ["null", "string"]
@@ -78,12 +68,7 @@
7868
"format": "date-time"
7969
},
8070
"lifetime_budget": {
81-
"type": ["null", "number"],
82-
"maximum": 100000000000000000000000000000000,
83-
"minimum": -100000000000000000000000000000000,
84-
"multipleOf": 0.000001,
85-
"exclusiveMaximum": true,
86-
"exclusiveMinimum": true
71+
"type": ["null", "number"]
8772
},
8873
"targeting": { "$ref": "targeting.json" },
8974
"bid_info": {

ā€Žairbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/schemas/shared/targeting.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
"$ref": "targeting.json#/definitions/id_name_pairs"
2222
},
2323
"home_type": {
24-
"$ref$": "targeting.json#/definitions/id_name_pairs"
24+
"$ref": "targeting.json#/definitions/id_name_pairs"
2525
},
2626
"friends_of_connections": {
2727
"$ref": "targeting.json#/definitions/id_name_pairs"

ā€Žairbyte-integrations/connectors/source-facebook-marketing/source_facebook_marketing/streams.py

+62-6
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from abc import ABC
2828
from collections import deque
2929
from datetime import datetime
30-
from typing import Any, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Sequence
30+
from typing import Any, Iterable, Iterator, List, Mapping, MutableMapping, Optional, Sequence, Union
3131

3232
import backoff
3333
import pendulum
@@ -46,7 +46,7 @@
4646
backoff_policy = retry_pattern(backoff.expo, FacebookRequestError, max_tries=5, factor=5)
4747

4848

49-
def remove_params_from_url(url: str, params: [str]) -> str:
49+
def remove_params_from_url(url: str, params: List[str]) -> str:
5050
"""
5151
Parses a URL and removes the query parameters specified in params
5252
:param url: URL
@@ -110,7 +110,63 @@ def read_records(
110110
) -> Iterable[Mapping[str, Any]]:
111111
"""Main read method used by CDK"""
112112
for record in self._read_records(params=self.request_params(stream_state=stream_state)):
113-
yield self._extend_record(record, fields=self.fields)
113+
yield self.transform(self._extend_record(record, fields=self.fields))
114+
115+
def transform(self, record: Mapping[str, Any]) -> Mapping[str, Any]:
116+
"""
117+
Use this method to remove update fields types in record according to schema.
118+
"""
119+
schema = self.get_json_schema()
120+
self.convert_to_schema_types(record, schema["properties"])
121+
return record
122+
123+
def get_python_type(self, _types: Union[list, str]) -> tuple:
124+
"""Converts types from schema to python types. Examples:
125+
- `["string", "null"]` will be converted to `(str,)`
126+
- `["array", "string", "null"]` will be converted to `(list, str,)`
127+
- `"boolean"` will be converted to `(bool,)`
128+
"""
129+
types_mapping = {
130+
"string": str,
131+
"number": float,
132+
"integer": int,
133+
"object": dict,
134+
"array": list,
135+
"boolean": bool,
136+
}
137+
138+
if isinstance(_types, list):
139+
return tuple([types_mapping[t] for t in _types if t != "null"])
140+
141+
return (types_mapping[_types],)
142+
143+
def convert_to_schema_types(self, record: Mapping[str, Any], schema: Mapping[str, Any]):
144+
"""
145+
Converts values' type from record to appropriate type from schema. For example, let's say we have `reach` value
146+
and in schema it has `number` type because it's, well, a number, but from API we are getting `reach` as string.
147+
This function fixes this and converts `reach` value from `string` to `number`. Same for all fields and all
148+
types from schema.
149+
"""
150+
if not schema:
151+
return
152+
153+
for key, value in record.items():
154+
if key not in schema:
155+
continue
156+
157+
if isinstance(value, dict):
158+
self.convert_to_schema_types(record=value, schema=schema[key].get("properties", {}))
159+
elif isinstance(value, list) and "items" in schema[key]:
160+
for record_list_item in value:
161+
if list in self.get_python_type(schema[key]["items"]["type"]):
162+
# TODO Currently we don't have support for list of lists.
163+
pass
164+
elif dict in self.get_python_type(schema[key]["items"]["type"]):
165+
self.convert_to_schema_types(record=record_list_item, schema=schema[key]["items"]["properties"])
166+
elif not isinstance(record_list_item, self.get_python_type(schema[key]["items"]["type"])):
167+
record[key] = self.get_python_type(schema[key]["items"]["type"])[0](record_list_item)
168+
elif not isinstance(value, self.get_python_type(schema[key]["type"])):
169+
record[key] = self.get_python_type(schema[key]["type"])[0](value)
114170

115171
def _read_records(self, params: Mapping[str, Any]) -> Iterable:
116172
"""Wrapper around query to backoff errors.
@@ -298,7 +354,7 @@ class AdsInsights(FBMarketingIncrementalStream):
298354
MAX_WAIT_TO_START = pendulum.duration(minutes=5)
299355
MAX_WAIT_TO_FINISH = pendulum.duration(minutes=30)
300356
MAX_ASYNC_SLEEP = pendulum.duration(minutes=5)
301-
MAX_ASYNC_JOBS = 3
357+
MAX_ASYNC_JOBS = 10
302358
INSIGHTS_RETENTION_PERIOD = pendulum.duration(days=37 * 30)
303359

304360
action_breakdowns = ALL_ACTION_BREAKDOWNS
@@ -325,7 +381,7 @@ def read_records(
325381
# because we query `lookback_window` days before actual cursor we might get records older then cursor
326382

327383
for obj in result.get_result():
328-
yield obj.export_all_data()
384+
yield self.transform(obj.export_all_data())
329385

330386
def stream_slices(self, stream_state: Mapping[str, Any] = None, **kwargs) -> Iterable[Optional[Mapping[str, Any]]]:
331387
"""Slice by date periods and schedule async job for each period, run at most MAX_ASYNC_JOBS jobs at the same time.
@@ -356,7 +412,7 @@ def wait_for_job(self, job) -> AdReportRun:
356412
job = job.api_get()
357413
job_progress_pct = job["async_percent_completion"]
358414
job_id = job["report_run_id"]
359-
self.logger.info(f"ReportRunId {job_id} is {job_progress_pct}% complete")
415+
self.logger.info(f"ReportRunId {job_id} is {job_progress_pct}% complete ({job['async_status']})")
360416
runtime = pendulum.now() - start_time
361417

362418
if job["async_status"] == "Job Completed":

ā€Žairbyte-integrations/connectors/source-facebook-marketing/unit_tests/test_streams.py

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
2222
# SOFTWARE.
2323
#
24+
2425
from source_facebook_marketing.streams import remove_params_from_url
2526

2627

ā€Ždocs/integrations/sources/facebook-marketing.md

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ With the Ad Account ID and API access token, you should be ready to start pullin
101101

102102
| Version | Date | Pull Request | Subject |
103103
| :------ | :-------- | :----- | :------ |
104+
| 0.2.17 | 2021-09-14 | [4978](https://github.com/airbytehq/airbyte/pull/4978) | Convert values' types according to schema types |
104105
| 0.2.16 | 2021-09-14 | [6060](https://github.com/airbytehq/airbyte/pull/6060) | Fix schema for `ads_insights` stream |
105106
| 0.2.15 | 2021-09-14 | [5958](https://github.com/airbytehq/airbyte/pull/5958) | Fix url parsing and add report that exposes conversions |
106107
| 0.2.14 | 2021-07-19 | [4820](https://github.com/airbytehq/airbyte/pull/4820) | Improve the rate limit management |

0 commit comments

Comments
Ā (0)