Skip to content

Commit d19754c

Browse files
authored
🐛 Source Github: Fix error handling for 404 streams (#10878)
* Fix, no error if "Projects" feature is disabled * improve requests.codes.NOT_FOUND Signed-off-by: Sergey Chvalyuk <[email protected]>
1 parent af1a503 commit d19754c

File tree

7 files changed

+82
-26
lines changed

7 files changed

+82
-26
lines changed

airbyte-config/init/src/main/resources/seed/source_definitions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -245,7 +245,7 @@
245245
- name: GitHub
246246
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
247247
dockerRepository: airbyte/source-github
248-
dockerImageTag: 0.2.21
248+
dockerImageTag: 0.2.22
249249
documentationUrl: https://docs.airbyte.io/integrations/sources/github
250250
icon: github.svg
251251
sourceType: api

airbyte-config/init/src/main/resources/seed/source_specs.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2357,7 +2357,7 @@
23572357
supportsNormalization: false
23582358
supportsDBT: false
23592359
supported_destination_sync_modes: []
2360-
- dockerImage: "airbyte/source-github:0.2.21"
2360+
- dockerImage: "airbyte/source-github:0.2.22"
23612361
spec:
23622362
documentationUrl: "https://docs.airbyte.io/integrations/sources/github"
23632363
connectionSpecification:

airbyte-integrations/connectors/source-github/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,5 @@ RUN pip install .
1212
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
1313
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]
1414

15-
LABEL io.airbyte.version=0.2.21
15+
LABEL io.airbyte.version=0.2.22
1616
LABEL io.airbyte.name=airbyte/source-github

airbyte-integrations/connectors/source-github/source_github/streams.py

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,13 @@ def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iter
100100
# This whole try/except situation in `read_records()` isn't good but right now in `self._send_request()`
101101
# function we have `response.raise_for_status()` so we don't have much choice on how to handle errors.
102102
# Bocked on https://github.com/airbytehq/airbyte/issues/3514.
103-
if e.response.status_code == requests.codes.FORBIDDEN:
103+
if e.response.status_code == requests.codes.NOT_FOUND:
104+
# A lot of streams are not available for repositories owned by a user instead of an organization.
105+
if isinstance(self, Organizations):
106+
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for organization `{stream_slice['organization']}`."
107+
else:
108+
error_msg = f"Syncing `{self.__class__.__name__}` stream isn't available for repository `{stream_slice['repository']}`."
109+
elif e.response.status_code == requests.codes.FORBIDDEN:
104110
# When using the `check_connection` method, we should raise an error if we do not have access to the repository.
105111
if isinstance(self, Repositories):
106112
raise e
@@ -116,30 +122,10 @@ def read_records(self, stream_slice: Mapping[str, any] = None, **kwargs) -> Iter
116122
error_msg = (
117123
f"Syncing `{self.name}` stream isn't available for repository `{repository}`. Full error message: {error_msg}"
118124
)
119-
elif e.response.status_code == requests.codes.NOT_FOUND and "/teams?" in error_msg:
120-
# For private repositories `Teams` stream is not available and we get "404 Client Error: Not Found for
121-
# url: https://api.github.com/orgs/<org_name>/teams?per_page=100" error.
122-
error_msg = f"Syncing `Team` stream isn't available for organization `{stream_slice['organization']}`."
123-
elif e.response.status_code == requests.codes.NOT_FOUND and "/repos?" in error_msg:
124-
# `Repositories` stream is not available for repositories not in an organization.
125-
# Handle "404 Client Error: Not Found for url: https://api.github.com/orgs/<org_name>/repos?per_page=100" error.
126-
error_msg = f"Syncing `Repositories` stream isn't available for organization `{stream_slice['organization']}`."
127-
elif e.response.status_code == requests.codes.GONE and "/projects?" in error_msg:
125+
elif e.response.status_code == requests.codes.GONE and isinstance(self, Projects):
128126
# Some repos don't have projects enabled and we we get "410 Client Error: Gone for
129127
# url: https://api.github.com/repos/xyz/projects?per_page=100" error.
130128
error_msg = f"Syncing `Projects` stream isn't available for repository `{stream_slice['repository']}`."
131-
elif e.response.status_code == requests.codes.NOT_FOUND and "/orgs/" in error_msg:
132-
# Some streams are not available for repositories owned by a user instead of an organization.
133-
# Handle "404 Client Error: Not Found" errors
134-
if isinstance(self, Repositories):
135-
error_msg = f"Syncing `Repositories` stream isn't available for organization `{stream_slice['organization']}`."
136-
elif isinstance(self, Users):
137-
error_msg = f"Syncing `Users` stream isn't available for organization `{stream_slice['organization']}`."
138-
elif isinstance(self, Organizations):
139-
error_msg = f"Syncing `Organizations` stream isn't available for organization `{stream_slice['organization']}`."
140-
else:
141-
self.logger.error(f"Undefined error while reading records: {error_msg}")
142-
raise e
143129
elif e.response.status_code == requests.codes.CONFLICT:
144130
error_msg = (
145131
f"Syncing `{self.name}` stream isn't available for repository "

airbyte-integrations/connectors/source-github/unit_tests/test_stream.py

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@
66
from unittest.mock import MagicMock, patch
77

88
import pytest
9+
import requests
910
import responses
1011
from airbyte_cdk.sources.streams.http.exceptions import BaseBackoffException
11-
from source_github.streams import PullRequestCommentReactions
12+
from source_github.streams import Projects, PullRequestCommentReactions, Repositories, Teams
13+
14+
from .utils import read_full_refresh
1215

1316
DEFAULT_BACKOFF_DELAYS = [5, 10, 20, 40, 80]
1417

@@ -47,3 +50,54 @@ def test_backoff_time(http_status, response_text, expected_backoff_time):
4750
args = {"authenticator": None, "repositories": ["test_repo"], "start_date": "start_date", "page_size_for_large_streams": 30}
4851
stream = PullRequestCommentReactions(**args)
4952
assert stream.backoff_time(response_mock) == expected_backoff_time
53+
54+
55+
@responses.activate
56+
def test_stream_teams_404():
57+
kwargs = {"organizations": ["org_name"]}
58+
stream = Teams(**kwargs)
59+
60+
responses.add(
61+
"GET",
62+
"https://api.github.com/orgs/org_name/teams",
63+
status=requests.codes.NOT_FOUND,
64+
json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/teams#list-teams"},
65+
)
66+
67+
assert read_full_refresh(stream) == []
68+
assert len(responses.calls) == 1
69+
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/teams?per_page=100"
70+
71+
72+
@responses.activate
73+
def test_stream_repositories_404():
74+
kwargs = {"organizations": ["org_name"]}
75+
stream = Repositories(**kwargs)
76+
77+
responses.add(
78+
"GET",
79+
"https://api.github.com/orgs/org_name/repos",
80+
status=requests.codes.NOT_FOUND,
81+
json={"message": "Not Found", "documentation_url": "https://docs.github.com/rest/reference/repos#list-organization-repositories"},
82+
)
83+
84+
assert read_full_refresh(stream) == []
85+
assert len(responses.calls) == 1
86+
assert responses.calls[0].request.url == "https://api.github.com/orgs/org_name/repos?per_page=100"
87+
88+
89+
@responses.activate
90+
def test_stream_projects_disabled():
91+
kwargs = {"start_date": "start_date", "page_size_for_large_streams": 30, "repositories": ["test_repo"]}
92+
stream = Projects(**kwargs)
93+
94+
responses.add(
95+
"GET",
96+
"https://api.github.com/repos/test_repo/projects",
97+
status=requests.codes.GONE,
98+
json={"message": "Projects are disabled for this repository", "documentation_url": "https://docs.github.com/v3/projects"},
99+
)
100+
101+
assert read_full_refresh(stream) == []
102+
assert len(responses.calls) == 1
103+
assert responses.calls[0].request.url == "https://api.github.com/repos/test_repo/projects?per_page=100&state=all"
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#
2+
# Copyright (c) 2021 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
6+
from airbyte_cdk.models import SyncMode
7+
from airbyte_cdk.sources.streams import Stream
8+
9+
10+
def read_full_refresh(stream_instance: Stream):
11+
records = []
12+
slices = stream_instance.stream_slices(sync_mode=SyncMode.full_refresh)
13+
for slice in slices:
14+
records.extend(list(stream_instance.read_records(stream_slice=slice, sync_mode=SyncMode.full_refresh)))
15+
return records

docs/integrations/sources/github.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ Your token should have at least the `repo` scope. Depending on which streams you
9595

9696
| Version | Date | Pull Request | Subject |
9797
|:--------|:-----------| :--- |:-------------------------------------------------------------------------------------------------------------|
98+
| 0.2.22 | 2022-03-10 | [10878](https://github.com/airbytehq/airbyte/pull/10878) | Fix error handling for unavailable streams with 404 status code |
9899
| 0.2.21 | 2022-03-04 | [10749](https://github.com/airbytehq/airbyte/pull/10749) | Add new stream `ProjectCards` |
99100
| 0.2.20 | 2022-02-16 | [10385](https://github.com/airbytehq/airbyte/pull/10385) | Add new stream `Deployments`, `ProjectColumns`, `PullRequestCommits` |
100101
| 0.2.19 | 2022-02-07 | [10211](https://github.com/airbytehq/airbyte/pull/10211) | Add human-readable error in case of incorrect organization or repo name |

0 commit comments

Comments
 (0)