Skip to content

Commit 2aaf33e

Browse files
authored
✨ [source-github] Bump cdk version and enable RFR for all non-incremental streams (#42966)
1 parent 871d5b3 commit 2aaf33e

File tree

9 files changed

+332
-86
lines changed

9 files changed

+332
-86
lines changed

airbyte-integrations/connectors/source-github/integration_tests/expected_records.jsonl

+2-2
Large diffs are not rendered by default.

airbyte-integrations/connectors/source-github/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ data:
1010
connectorSubtype: api
1111
connectorType: source
1212
definitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
13-
dockerImageTag: 1.8.1
13+
dockerImageTag: 1.8.2
1414
dockerRepository: airbyte/source-github
1515
documentationUrl: https://docs.airbyte.com/integrations/sources/github
1616
githubIssueLabel: source-github

airbyte-integrations/connectors/source-github/poetry.lock

+26-26
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

airbyte-integrations/connectors/source-github/pyproject.toml

+3-3
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
33
build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
6-
version = "1.8.1"
6+
version = "1.8.2"
77
name = "source-github"
88
description = "Source implementation for GitHub."
99
authors = [ "Airbyte <[email protected]>",]
@@ -16,8 +16,8 @@ repository = "https://github.com/airbytehq/airbyte"
1616
include = "source_github"
1717

1818
[tool.poetry.dependencies]
19-
python = "^3.9,<3.12"
20-
airbyte-cdk = "^3"
19+
python = "^3.10,<3.12"
20+
airbyte-cdk = "^4"
2121
sgqlc = "==16.3"
2222

2323
[tool.poetry.scripts]

airbyte-integrations/connectors/source-github/source_github/errors_handlers.py

+9
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,15 @@ def interpret_response(self, response_or_exception: Optional[Union[requests.Resp
9090

9191

9292
class ContributorActivityErrorHandler(HttpStatusErrorHandler):
93+
"""
94+
This custom error handler is needed for streams based on repository statistics endpoints like ContributorActivity because
95+
when requesting data that hasn't been cached yet when the request is made, you'll receive a 202 response. And these requests
96+
need to retried to get the actual results.
97+
98+
See the docs for more info:
99+
https://docs.github.com/en/rest/metrics/statistics?apiVersion=2022-11-28#a-word-about-caching
100+
"""
101+
93102
def interpret_response(self, response_or_exception: Optional[Union[requests.Response, Exception]] = None) -> ErrorResolution:
94103
if isinstance(response_or_exception, requests.Response) and response_or_exception.status_code == requests.codes.ACCEPTED:
95104
return ErrorResolution(

airbyte-integrations/connectors/source-github/source_github/streams.py

+14-2
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,11 @@
99

1010
import pendulum
1111
import requests
12-
from airbyte_cdk import BackoffStrategy
12+
from airbyte_cdk import BackoffStrategy, StreamSlice
1313
from airbyte_cdk.models import AirbyteLogMessage, AirbyteMessage, Level, SyncMode
1414
from airbyte_cdk.models import Type as MessageType
1515
from airbyte_cdk.sources.streams.availability_strategy import AvailabilityStrategy
16+
from airbyte_cdk.sources.streams.checkpoint.substream_resumable_full_refresh_cursor import SubstreamResumableFullRefreshCursor
1617
from airbyte_cdk.sources.streams.core import CheckpointMixin, Stream
1718
from airbyte_cdk.sources.streams.http import HttpStream
1819
from airbyte_cdk.sources.streams.http.error_handlers import ErrorHandler, ErrorResolution, HttpStatusErrorHandler, ResponseAction
@@ -57,6 +58,9 @@ def __init__(self, api_url: str = "https://api.github.com", access_token_type: s
5758
self.api_url = api_url
5859
self.state = {}
5960

61+
if not self.supports_incremental:
62+
self.cursor = SubstreamResumableFullRefreshCursor()
63+
6064
@property
6165
def url_base(self) -> str:
6266
return self.api_url
@@ -1613,7 +1617,8 @@ def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str,
16131617
return record
16141618

16151619
def get_error_handler(self) -> Optional[ErrorHandler]:
1616-
return ContributorActivityErrorHandler(logger=self.logger, max_retries=self.max_retries, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING)
1620+
1621+
return ContributorActivityErrorHandler(logger=self.logger, max_retries=5, error_mapping=GITHUB_DEFAULT_ERROR_MAPPING)
16171622

16181623
def get_backoff_strategy(self) -> Optional[Union[BackoffStrategy, List[BackoffStrategy]]]:
16191624
return ContributorActivityBackoffStrategy()
@@ -1645,6 +1650,13 @@ def read_records(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> Iter
16451650
message=f"Syncing `{self.__class__.__name__}` " f"stream isn't available for repository `{repository}`.",
16461651
),
16471652
)
1653+
1654+
# In order to retain the existing stream behavior before we added RFR to this stream, we need to close out the
1655+
# partition after we give up the maximum number of retries on the 202 response. This does lead to the question
1656+
# of if we should prematurely exit in the first place, but for now we're going to aim for feature parity
1657+
partition_obj = stream_slice.get("partition")
1658+
if self.cursor and partition_obj:
1659+
self.cursor.close_slice(StreamSlice(cursor_slice={}, partition=partition_obj))
16481660
else:
16491661
raise e
16501662

0 commit comments

Comments
 (0)