Skip to content

Commit 0443db0

Browse files
committed
[ISSUE-32057] have events stream always be concurrent
1 parent a8722be commit 0443db0

16 files changed

+125
-84
lines changed

airbyte-integrations/connectors/source-stripe/source_stripe/run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,12 @@
1616
def _get_source(args: List[str]):
1717
catalog_path = AirbyteEntrypoint.extract_catalog(args)
1818
config_path = AirbyteEntrypoint.extract_config(args)
19+
state_path = AirbyteEntrypoint.extract_state(args)
1920
try:
2021
return SourceStripe(
2122
SourceStripe.read_catalog(catalog_path) if catalog_path else None,
2223
SourceStripe.read_config(config_path) if config_path else None,
24+
SourceStripe.read_state(state_path) if state_path else None,
2325
)
2426
except Exception as error:
2527
print(

airbyte-integrations/connectors/source-stripe/source_stripe/source.py

Lines changed: 52 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import logging
66
import os
7-
from datetime import timedelta
7+
from datetime import datetime, timedelta
88
from typing import Any, List, Mapping, MutableMapping, Optional, Tuple
99

1010
import pendulum
@@ -14,11 +14,14 @@
1414
from airbyte_cdk.models import ConfiguredAirbyteCatalog, FailureType
1515
from airbyte_cdk.sources.concurrent_source.concurrent_source import ConcurrentSource
1616
from airbyte_cdk.sources.concurrent_source.concurrent_source_adapter import ConcurrentSourceAdapter
17+
from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
1718
from airbyte_cdk.sources.message.repository import InMemoryMessageRepository
19+
from airbyte_cdk.sources.source import TState
1820
from airbyte_cdk.sources.streams import Stream
1921
from airbyte_cdk.sources.streams.call_rate import AbstractAPIBudget, HttpAPIBudget, HttpRequestMatcher, MovingWindowCallRatePolicy, Rate
2022
from airbyte_cdk.sources.streams.concurrent.adapters import StreamFacade
21-
from airbyte_cdk.sources.streams.concurrent.cursor import NoopCursor
23+
from airbyte_cdk.sources.streams.concurrent.cursor import Comparable, ConcurrentCursor, CursorField, NoopCursor
24+
from airbyte_cdk.sources.streams.concurrent.state_converters.datetime_stream_state_converter import EpochValueConcurrentStreamStateConverter
2225
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
2326
from airbyte_cdk.utils.traced_exception import AirbyteTracedException
2427
from airbyte_protocol.models import SyncMode
@@ -49,8 +52,11 @@
4952
class SourceStripe(ConcurrentSourceAdapter):
5053

5154
message_repository = InMemoryMessageRepository(entrypoint_logger.level)
55+
_SLICE_BOUNDARY_FIELDS_BY_IMPLEMENTATION = {
56+
Events: ("created[gte]", "created[lte]"),
57+
}
5258

53-
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], **kwargs):
59+
def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional[Mapping[str, Any]], state: TState, **kwargs):
5460
if config:
5561
concurrency_level = min(config.get("num_workers", _DEFAULT_CONCURRENCY), _MAX_CONCURRENCY)
5662
else:
@@ -60,6 +66,7 @@ def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional
6066
concurrency_level, concurrency_level // 2, logger, self._slice_logger, self.message_repository
6167
)
6268
super().__init__(concurrent_source)
69+
self._state = state
6370
if catalog:
6471
self._streams_configured_as_full_refresh = {
6572
configured_stream.stream.name
@@ -71,9 +78,8 @@ def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional
7178
self._streams_configured_as_full_refresh = set()
7279

7380
@staticmethod
74-
def validate_and_fill_with_defaults(config: MutableMapping) -> MutableMapping:
75-
start_date, lookback_window_days, slice_range = (
76-
config.get("start_date"),
81+
def validate_and_fill_with_defaults(config: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
82+
lookback_window_days, slice_range = (
7783
config.get("lookback_window_days"),
7884
config.get("slice_range"),
7985
)
@@ -86,9 +92,9 @@ def validate_and_fill_with_defaults(config: MutableMapping) -> MutableMapping:
8692
internal_message=message,
8793
failure_type=FailureType.config_error,
8894
)
89-
if start_date:
90-
# verifies the start_date is parseable
91-
SourceStripe._start_date_to_timestamp(start_date)
95+
96+
# verifies the start_date in the config is valid
97+
SourceStripe._start_date_to_timestamp(config)
9298
if slice_range is None:
9399
config["slice_range"] = 365
94100
elif not isinstance(slice_range, int) or slice_range < 1:
@@ -100,7 +106,7 @@ def validate_and_fill_with_defaults(config: MutableMapping) -> MutableMapping:
100106
)
101107
return config
102108

103-
def check_connection(self, logger: AirbyteLogger, config: Mapping[str, Any]) -> Tuple[bool, Any]:
109+
def check_connection(self, logger: AirbyteLogger, config: MutableMapping[str, Any]) -> Tuple[bool, Any]:
104110
self.validate_and_fill_with_defaults(config)
105111
stripe.api_key = config["client_secret"]
106112
try:
@@ -167,14 +173,11 @@ def get_api_call_budget(self, config: Mapping[str, Any]) -> AbstractAPIBudget:
167173

168174
return HttpAPIBudget(policies=policies)
169175

170-
def streams(self, config: Mapping[str, Any]) -> List[Stream]:
176+
def streams(self, config: MutableMapping[str, Any]) -> List[Stream]:
171177
config = self.validate_and_fill_with_defaults(config)
172178
authenticator = TokenAuthenticator(config["client_secret"])
173179

174-
if "start_date" in config:
175-
start_timestamp = self._start_date_to_timestamp(config["start_date"])
176-
else:
177-
start_timestamp = pendulum.datetime(2017, 1, 25).int_timestamp
180+
start_timestamp = self._start_date_to_timestamp(config)
178181
args = {
179182
"authenticator": authenticator,
180183
"account_id": config["account_id"],
@@ -511,21 +514,47 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
511514
),
512515
]
513516

514-
return [
515-
StreamFacade.create_from_stream(stream, self, entrypoint_logger, self._create_empty_state(), NoopCursor())
516-
if stream.name in self._streams_configured_as_full_refresh
517-
else stream
518-
for stream in streams
519-
]
517+
state_manager = ConnectorStateManager(stream_instance_map={s.name: s for s in streams}, state=self._state)
518+
return [self._to_concurrent(stream, self._start_date_to_timestamp(config), state_manager) for stream in streams]
519+
520+
def _to_concurrent(self, stream: Stream, fallback_start, state_manager: ConnectorStateManager) -> Stream:
521+
if os.environ.get("SKIP_CONCURRENCY"):
522+
return stream
523+
if stream.name in self._streams_configured_as_full_refresh:
524+
return StreamFacade.create_from_stream(stream, self, entrypoint_logger, self._create_empty_state(), NoopCursor())
525+
526+
state = state_manager.get_stream_state(stream.name, stream.namespace)
527+
slice_boundary_fields = self._SLICE_BOUNDARY_FIELDS_BY_IMPLEMENTATION.get(type(stream))
528+
if slice_boundary_fields:
529+
cursor_field = CursorField(stream.cursor_field) if isinstance(stream.cursor_field, str) else CursorField(stream.cursor_field[0])
530+
converter = EpochValueConcurrentStreamStateConverter()
531+
cursor = ConcurrentCursor(
532+
stream.name,
533+
stream.namespace,
534+
state_manager.get_stream_state(stream.name, stream.namespace),
535+
self.message_repository,
536+
state_manager,
537+
converter,
538+
cursor_field,
539+
slice_boundary_fields,
540+
fallback_start,
541+
)
542+
return StreamFacade.create_from_stream(stream, self, entrypoint_logger, state, cursor)
543+
544+
return stream
520545

521546
def _create_empty_state(self) -> MutableMapping[str, Any]:
522547
# The state is known to be empty because concurrent CDK is currently only used for full refresh
523548
return {}
524549

525550
@staticmethod
526-
def _start_date_to_timestamp(start_date: str) -> int:
551+
def _start_date_to_timestamp(config: Mapping[str, Any]) -> int:
552+
if "start_date" not in config:
553+
return pendulum.datetime(2017, 1, 25).int_timestamp # type: ignore # pendulum not typed
554+
555+
start_date = config["start_date"]
527556
try:
528-
return pendulum.parse(start_date).int_timestamp
557+
return pendulum.parse(start_date).int_timestamp # type: ignore # pendulum not typed
529558
except pendulum.parsing.exceptions.ParserError as e:
530559
message = f"Invalid start date {start_date}. Please use YYYY-MM-DDTHH:MM:SSZ format."
531560
raise AirbyteTracedException(

airbyte-integrations/connectors/source-stripe/unit_tests/conftest.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import pytest
88
from airbyte_cdk.sources.streams.http.auth import TokenAuthenticator
9+
from airbyte_cdk.test.state_builder import StateBuilder
910

1011
os.environ["CACHE_DISABLED"] = "true"
1112
os.environ["DEPLOYMENT_MODE"] = "testing"
@@ -40,7 +41,7 @@ def stream_by_name(config):
4041
from source_stripe.source import SourceStripe
4142

4243
def mocker(stream_name, source_config=config):
43-
source = SourceStripe(None, source_config)
44+
source = SourceStripe(None, source_config, StateBuilder().build())
4445
streams = source.streams(source_config)
4546
for stream in streams:
4647
if stream.name == stream_name:

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_application_fees.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
22

3-
import json
43
from datetime import datetime, timedelta, timezone
5-
from typing import Any, Dict, Optional
4+
from typing import Any, Dict, Optional, List
65
from unittest import TestCase
76

87
import freezegun
8+
from airbyte_cdk.sources.source import TState
99
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1010
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1111
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -19,7 +19,7 @@
1919
find_template,
2020
)
2121
from airbyte_cdk.test.state_builder import StateBuilder
22-
from airbyte_protocol.models import ConfiguredAirbyteCatalog, FailureType, SyncMode
22+
from airbyte_protocol.models import ConfiguredAirbyteCatalog, FailureType, SyncMode, AirbyteStateMessage
2323
from integration.config import ConfigBuilder
2424
from integration.pagination import StripePaginationStrategy
2525
from integration.request_builder import StripeRequestBuilder
@@ -55,8 +55,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
5555
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
5656

5757

58-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
59-
return SourceStripe(catalog, config)
58+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[List[AirbyteStateMessage]]) -> SourceStripe:
59+
return SourceStripe(catalog, config, state)
6060

6161

6262
def _an_event() -> RecordBuilder:
@@ -110,12 +110,12 @@ def _given_events_availability_check(http_mocker: HttpMocker) -> None:
110110
def _read(
111111
config_builder: ConfigBuilder,
112112
sync_mode: SyncMode,
113-
state: Optional[Dict[str, Any]] = None,
113+
state: Optional[List[AirbyteStateMessage]] = None,
114114
expecting_exception: bool = False
115115
) -> EntrypointOutput:
116116
catalog = _catalog(sync_mode)
117117
config = config_builder.build()
118-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
118+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
119119

120120

121121
@freezegun.freeze_time(_NOW.isoformat())
@@ -372,5 +372,5 @@ def test_given_state_earlier_than_30_days_when_read_then_query_events_using_type
372372
def _an_application_fee_event(self) -> RecordBuilder:
373373
return _an_event().with_field(_DATA_FIELD, _an_application_fee().build())
374374

375-
def _read(self, config: ConfigBuilder, state: Optional[Dict[str, Any]], expecting_exception: bool = False) -> EntrypointOutput:
375+
def _read(self, config: ConfigBuilder, state: Optional[List[AirbyteStateMessage]], expecting_exception: bool = False) -> EntrypointOutput:
376376
return _read(config, SyncMode.incremental, state, expecting_exception)

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_application_fees_refunds.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from unittest import TestCase
88

99
import freezegun
10+
from airbyte_cdk.sources.source import TState
1011
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1112
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1213
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -62,8 +63,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
6263
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
6364

6465

65-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
66-
return SourceStripe(catalog, config)
66+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[TState]) -> SourceStripe:
67+
return SourceStripe(catalog, config, state)
6768

6869

6970
def _an_event() -> RecordBuilder:
@@ -143,7 +144,7 @@ def _read(
143144
) -> EntrypointOutput:
144145
catalog = _catalog(sync_mode)
145146
config = config_builder.build()
146-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
147+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
147148

148149

149150
def _assert_not_available(output: EntrypointOutput) -> None:

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_authorizations.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
22

3-
import json
43
from datetime import datetime, timedelta, timezone
54
from typing import Any, Dict, Optional
65
from unittest import TestCase
76

87
import freezegun
8+
from airbyte_cdk.sources.source import TState
99
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1010
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1111
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -55,8 +55,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
5555
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
5656

5757

58-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
59-
return SourceStripe(catalog, config)
58+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[TState]) -> SourceStripe:
59+
return SourceStripe(catalog, config, state)
6060

6161

6262
def _an_event() -> RecordBuilder:
@@ -115,7 +115,7 @@ def _read(
115115
) -> EntrypointOutput:
116116
catalog = _catalog(sync_mode)
117117
config = config_builder.build()
118-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
118+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
119119

120120

121121
@freezegun.freeze_time(_NOW.isoformat())

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_bank_accounts.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from unittest import TestCase
88

99
import freezegun
10+
from airbyte_cdk.sources.source import TState
1011
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1112
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1213
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -66,8 +67,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
6667
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
6768

6869

69-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
70-
return SourceStripe(catalog, config)
70+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[TState]) -> SourceStripe:
71+
return SourceStripe(catalog, config, state)
7172

7273

7374
def _an_event() -> RecordBuilder:
@@ -146,7 +147,7 @@ def _read(
146147
) -> EntrypointOutput:
147148
catalog = _catalog(sync_mode)
148149
config = config_builder.build()
149-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
150+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
150151

151152

152153
def _assert_not_available(output: EntrypointOutput) -> None:

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_cards.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
22

3-
import json
43
from datetime import datetime, timedelta, timezone
54
from typing import Any, Dict, Optional
65
from unittest import TestCase
76

87
import freezegun
8+
from airbyte_cdk.sources.source import TState
99
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1010
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1111
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -55,8 +55,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
5555
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
5656

5757

58-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
59-
return SourceStripe(catalog, config)
58+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[TState]) -> SourceStripe:
59+
return SourceStripe(catalog, config, state)
6060

6161

6262
def _an_event() -> RecordBuilder:
@@ -115,7 +115,7 @@ def _read(
115115
) -> EntrypointOutput:
116116
catalog = _catalog(sync_mode)
117117
config = config_builder.build()
118-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
118+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
119119

120120

121121
@freezegun.freeze_time(_NOW.isoformat())

airbyte-integrations/connectors/source-stripe/unit_tests/integration/test_early_fraud_warnings.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
# Copyright (c) 2023 Airbyte, Inc., all rights reserved.
22

3-
import json
43
from datetime import datetime, timedelta, timezone
54
from typing import Any, Dict, Optional
65
from unittest import TestCase
76

87
import freezegun
8+
from airbyte_cdk.sources.source import TState
99
from airbyte_cdk.test.catalog_builder import CatalogBuilder
1010
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput, read
1111
from airbyte_cdk.test.mock_http import HttpMocker, HttpRequest, HttpResponse
@@ -55,8 +55,8 @@ def _catalog(sync_mode: SyncMode) -> ConfiguredAirbyteCatalog:
5555
return CatalogBuilder().with_stream(_STREAM_NAME, sync_mode).build()
5656

5757

58-
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any]) -> SourceStripe:
59-
return SourceStripe(catalog, config)
58+
def _source(catalog: ConfiguredAirbyteCatalog, config: Dict[str, Any], state: Optional[TState]) -> SourceStripe:
59+
return SourceStripe(catalog, config, state)
6060

6161

6262
def _an_event() -> RecordBuilder:
@@ -115,7 +115,7 @@ def _read(
115115
) -> EntrypointOutput:
116116
catalog = _catalog(sync_mode)
117117
config = config_builder.build()
118-
return read(_source(catalog, config), config, catalog, state, expecting_exception)
118+
return read(_source(catalog, config, state), config, catalog, state, expecting_exception)
119119

120120

121121
@freezegun.freeze_time(_NOW.isoformat())

0 commit comments

Comments
 (0)