Skip to content

Commit e733052

Browse files
bleonardjatinyadav-cc
authored andcommitted
Handle seeing uncompressed sendgrid contact data (airbytehq#35343)
1 parent 1d51b6d commit e733052

File tree

6 files changed

+39
-11
lines changed

6 files changed

+39
-11
lines changed

airbyte-integrations/connectors/source-sendgrid/integration_tests/expected_records.jsonl

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -184,12 +184,12 @@
184184
{"stream": "suppression_group_members", "data": {"email": "test-forsuppressiongroup [email protected]", "group_id": 14772, "group_name": "Test Suggestions Group 12", "created_at": 1612363238}, "emitted_at": 1631093393000}
185185
{"stream": "suppression_group_members", "data": {"email": "test-forsuppressiongroup [email protected]", "group_id": 14772, "group_name": "Test Suggestions Group 12", "created_at": 1612363238}, "emitted_at": 1631093393000}
186186
{"stream": "suppression_group_members", "data": {"email": "[email protected]", "group_id": 14780, "group_name": "Test Suggestions Group 20", "created_at": 1631093329}, "emitted_at": 1631093393000}
187-
{"stream": "bounces", "data": { "created": 1621442821, "email": "vadym.hevlich@zazmicinvalid", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680684}
188-
{"stream": "bounces", "data": { "created": 1621441107, "email": "vadym.hevlich@zazmiccom2", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680684}
189-
{"stream": "bounces", "data": { "created": 1621442883, "email": "vadym.hevlich@zazmic_com", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680684}
190-
{"stream": "bounces", "data": { "created": 1621441104, "email": "vadym.hevlich@zazmiccom1", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680684}
191-
{"stream": "bounces", "data": { "created": 1621442811, "email": "vadym.hevlich@zazmicio", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680685}
192-
{"stream": "bounces", "data": { "created": 1621430037, "email": "vadym.hevlich@zazmiccom", "reason": "Invalid Domain", "status": "" }, "emitted_at": 1678792680685}
187+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmic_com", "created": 1621439283}, "emitted_at": 1708535996116}
188+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmicinvalid", "created": 1621439221}, "emitted_at": 1708535996116}
189+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmicio", "created": 1621439211}, "emitted_at": 1708535996116}
190+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmiccom2", "created": 1621437507}, "emitted_at": 1708535996117}
191+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmiccom1", "created": 1621437504}, "emitted_at": 1708535996117}
192+
{"stream": "bounces", "data": {"status": "", "reason": "Invalid Domain", "email": "vadym.hevlich@zazmiccom", "created": 1621426437}, "emitted_at": 1708535996117}
193193
{"stream": "campaigns", "data": {"created_at": "2021-09-08T09:07:48Z", "id": "3c5a9fa6-1084-11ec-ac32-4228d699bad5", "name": "Untitled Single Send", "status": "triggered", "updated_at": "2021-09-08T09:11:08Z", "is_abtest": false, "channels": ["email"]}, "emitted_at": 1678791750589}
194194
{"stream": "campaigns", "data": {"created_at": "2021-09-08T09:04:36Z", "id": "c9f286fb-1083-11ec-ae03-ca0fc7f28419", "name": "Copy of Untitled Single Send", "status": "triggered", "updated_at": "2021-09-08T09:09:08Z", "is_abtest": false, "channels": ["email"]}, "emitted_at": 1678791750589}
195195
{"stream": "campaigns", "data": {"created_at": "2021-09-08T08:53:59Z", "id": "4e5be6a3-1082-11ec-8512-9afd40c324e6", "name": "Untitled Single Send", "status": "triggered", "updated_at": "2021-09-08T08:57:08Z", "is_abtest": false, "channels": ["email"]}, "emitted_at": 1678791750590}

airbyte-integrations/connectors/source-sendgrid/metadata.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ data:
1010
connectorSubtype: api
1111
connectorType: source
1212
definitionId: fbb5fbe2-16ad-4cf4-af7d-ff9d9c316c87
13-
dockerImageTag: 0.4.2
13+
dockerImageTag: 0.4.3
1414
dockerRepository: airbyte/source-sendgrid
1515
documentationUrl: https://docs.airbyte.com/integrations/sources/sendgrid
1616
githubIssueLabel: source-sendgrid

airbyte-integrations/connectors/source-sendgrid/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "0.4.2"
6+
version = "0.4.3"
77
name = "source-sendgrid"
88
description = "Source implementation for Sendgrid."
99
authors = [ "Airbyte <[email protected]>",]

airbyte-integrations/connectors/source-sendgrid/source_sendgrid/streams.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,13 @@ def download_data(self, url: str, chunk_size: int = 1024) -> tuple[str, str]:
299299
tmp_file, "wb"
300300
) as data_file:
301301
for chunk in response.iter_content(chunk_size=chunk_size):
302-
data_file.write(decompressor.decompress(chunk))
302+
try:
303+
# see if it's compressed. we are seeing some that are not all of a sudden.
304+
# but let's also guard against the case where sendgrid changes it back.
305+
data_file.write(decompressor.decompress(chunk))
306+
except zlib.error as e:
307+
# it's not actually compressed!
308+
data_file.write(chunk)
303309
# check the file exists
304310
if os.path.isfile(tmp_file):
305311
return tmp_file, self.encoding

airbyte-integrations/connectors/source-sendgrid/unit_tests/unit_test.py

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ def test_should_retry_on_permission_error(requests_mock, stream_class, status, e
153153

154154
def test_compressed_contact_response(requests_mock):
155155
stream = Contacts()
156-
with open(os.path.dirname(__file__) + "/compressed_response", "rb") as compressed_response:
156+
with open(os.path.dirname(__file__) + "/compressed_response", "rb") as file_response:
157157
url = "https://api.sendgrid.com/v3/marketing/contacts/exports"
158158
requests_mock.register_uri("POST", url, [{"json": {"id": "random_id"}, "status_code": 202}])
159159
url = "https://api.sendgrid.com/v3/marketing/contacts/exports/random_id"
@@ -162,7 +162,28 @@ def test_compressed_contact_response(requests_mock):
162162
{"json": {"status": "ready", "urls": ["https://sample_url/sample_csv.csv.gzip"]}, "status_code": 202},
163163
]
164164
requests_mock.register_uri("GET", url, resp_bodies)
165-
requests_mock.register_uri("GET", "https://sample_url/sample_csv.csv.gzip", [{"body": compressed_response, "status_code": 202}])
165+
requests_mock.register_uri("GET", "https://sample_url/sample_csv.csv.gzip", [{"body": file_response, "status_code": 202}])
166+
recs = list(stream.read_records(sync_mode=SyncMode.full_refresh))
167+
decompressed_response = pd.read_csv(os.path.dirname(__file__) + "/decompressed_response.csv", dtype=str)
168+
expected_records = [
169+
{k.lower(): v for k, v in x.items()} for x in decompressed_response.replace({nan: None}).to_dict(orient="records")
170+
]
171+
172+
assert recs == expected_records
173+
174+
175+
def test_uncompressed_contact_response(requests_mock):
176+
stream = Contacts()
177+
with open(os.path.dirname(__file__) + "/decompressed_response.csv", "rb") as file_response:
178+
url = "https://api.sendgrid.com/v3/marketing/contacts/exports"
179+
requests_mock.register_uri("POST", url, [{"json": {"id": "random_id"}, "status_code": 202}])
180+
url = "https://api.sendgrid.com/v3/marketing/contacts/exports/random_id"
181+
resp_bodies = [
182+
{"json": {"status": "pending", "id": "random_id", "urls": []}, "status_code": 202},
183+
{"json": {"status": "ready", "urls": ["https://sample_url/sample_csv.csv.gzip"]}, "status_code": 202},
184+
]
185+
requests_mock.register_uri("GET", url, resp_bodies)
186+
requests_mock.register_uri("GET", "https://sample_url/sample_csv.csv.gzip", [{"body": file_response, "status_code": 202}])
166187
recs = list(stream.read_records(sync_mode=SyncMode.full_refresh))
167188
decompressed_response = pd.read_csv(os.path.dirname(__file__) + "/decompressed_response.csv", dtype=str)
168189
expected_records = [

docs/integrations/sources/sendgrid.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ The connector is restricted by normal Sendgrid [requests limitation](https://doc
8484

8585
| Version | Date | Pull Request | Subject |
8686
|:--------|:-----------|:---------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
87+
| 0.4.3 | 2024-02-21 | [35181](https://github.com/airbytehq/airbyte/pull/35343) | Handle uncompressed contacts downloads. |
8788
| 0.4.2 | 2024-02-12 | [35181](https://github.com/airbytehq/airbyte/pull/35181) | Manage dependencies with Poetry. |
8889
| 0.4.1 | 2023-10-18 | [31543](https://github.com/airbytehq/airbyte/pull/31543) | Base image migration: remove Dockerfile and use the python-connector-base image |
8990
| 0.4.0 | 2023-05-19 | [23959](https://github.com/airbytehq/airbyte/pull/23959) | Add `unsubscribe_groups`stream

0 commit comments

Comments
 (0)