14
14
from airbyte_cdk .models import ConfiguredAirbyteCatalog , FailureType
15
15
from airbyte_cdk .sources .concurrent_source .concurrent_source import ConcurrentSource
16
16
from airbyte_cdk .sources .concurrent_source .concurrent_source_adapter import ConcurrentSourceAdapter
17
+ from airbyte_cdk .sources .connector_state_manager import ConnectorStateManager
17
18
from airbyte_cdk .sources .message .repository import InMemoryMessageRepository
19
+ from airbyte_cdk .sources .source import TState
18
20
from airbyte_cdk .sources .streams import Stream
19
21
from airbyte_cdk .sources .streams .call_rate import AbstractAPIBudget , HttpAPIBudget , HttpRequestMatcher , MovingWindowCallRatePolicy , Rate
20
22
from airbyte_cdk .sources .streams .concurrent .adapters import StreamFacade
21
- from airbyte_cdk .sources .streams .concurrent .cursor import NoopCursor
23
+ from airbyte_cdk .sources .streams .concurrent .cursor import Comparable , ConcurrentCursor , CursorField , NoopCursor
24
+ from airbyte_cdk .sources .streams .concurrent .state_converters .datetime_stream_state_converter import EpochValueConcurrentStreamStateConverter
22
25
from airbyte_cdk .sources .streams .http .auth import TokenAuthenticator
23
26
from airbyte_cdk .utils .traced_exception import AirbyteTracedException
24
27
from airbyte_protocol .models import SyncMode
42
45
_MAX_CONCURRENCY = 20
43
46
_DEFAULT_CONCURRENCY = 10
44
47
_CACHE_DISABLED = os .environ .get ("CACHE_DISABLED" )
48
+ _REFUND_STREAM_NAME = "refunds"
49
+ _INCREMENTAL_CONCURRENCY_EXCLUSION = {
50
+ _REFUND_STREAM_NAME , # excluded because of the upcoming changes in terms of cursor https://github.com/airbytehq/airbyte/issues/34332
51
+ }
45
52
USE_CACHE = not _CACHE_DISABLED
46
53
STRIPE_TEST_ACCOUNT_PREFIX = "sk_test_"
47
54
48
55
49
56
class SourceStripe (ConcurrentSourceAdapter ):
50
57
51
58
message_repository = InMemoryMessageRepository (entrypoint_logger .level )
59
+ _SLICE_BOUNDARY_FIELDS_BY_IMPLEMENTATION = {
60
+ Events : ("created[gte]" , "created[lte]" ),
61
+ CreatedCursorIncrementalStripeStream : ("created[gte]" , "created[lte]" ),
62
+ }
52
63
53
- def __init__ (self , catalog : Optional [ConfiguredAirbyteCatalog ], config : Optional [Mapping [str , Any ]], ** kwargs ):
64
+ def __init__ (self , catalog : Optional [ConfiguredAirbyteCatalog ], config : Optional [Mapping [str , Any ]], state : TState , ** kwargs ):
54
65
if config :
55
66
concurrency_level = min (config .get ("num_workers" , _DEFAULT_CONCURRENCY ), _MAX_CONCURRENCY )
56
67
else :
@@ -60,6 +71,7 @@ def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional
60
71
concurrency_level , concurrency_level // 2 , logger , self ._slice_logger , self .message_repository
61
72
)
62
73
super ().__init__ (concurrent_source )
74
+ self ._state = state
63
75
if catalog :
64
76
self ._streams_configured_as_full_refresh = {
65
77
configured_stream .stream .name
@@ -71,9 +83,8 @@ def __init__(self, catalog: Optional[ConfiguredAirbyteCatalog], config: Optional
71
83
self ._streams_configured_as_full_refresh = set ()
72
84
73
85
@staticmethod
74
- def validate_and_fill_with_defaults (config : MutableMapping ) -> MutableMapping :
75
- start_date , lookback_window_days , slice_range = (
76
- config .get ("start_date" ),
86
+ def validate_and_fill_with_defaults (config : MutableMapping [str , Any ]) -> MutableMapping [str , Any ]:
87
+ lookback_window_days , slice_range = (
77
88
config .get ("lookback_window_days" ),
78
89
config .get ("slice_range" ),
79
90
)
@@ -86,9 +97,9 @@ def validate_and_fill_with_defaults(config: MutableMapping) -> MutableMapping:
86
97
internal_message = message ,
87
98
failure_type = FailureType .config_error ,
88
99
)
89
- if start_date :
90
- # verifies the start_date is parseable
91
- SourceStripe ._start_date_to_timestamp (start_date )
100
+
101
+ # verifies the start_date in the config is valid
102
+ SourceStripe ._start_date_to_timestamp (config )
92
103
if slice_range is None :
93
104
config ["slice_range" ] = 365
94
105
elif not isinstance (slice_range , int ) or slice_range < 1 :
@@ -100,7 +111,7 @@ def validate_and_fill_with_defaults(config: MutableMapping) -> MutableMapping:
100
111
)
101
112
return config
102
113
103
- def check_connection (self , logger : AirbyteLogger , config : Mapping [str , Any ]) -> Tuple [bool , Any ]:
114
+ def check_connection (self , logger : AirbyteLogger , config : MutableMapping [str , Any ]) -> Tuple [bool , Any ]:
104
115
self .validate_and_fill_with_defaults (config )
105
116
stripe .api_key = config ["client_secret" ]
106
117
try :
@@ -167,14 +178,11 @@ def get_api_call_budget(self, config: Mapping[str, Any]) -> AbstractAPIBudget:
167
178
168
179
return HttpAPIBudget (policies = policies )
169
180
170
- def streams (self , config : Mapping [str , Any ]) -> List [Stream ]:
181
+ def streams (self , config : MutableMapping [str , Any ]) -> List [Stream ]:
171
182
config = self .validate_and_fill_with_defaults (config )
172
183
authenticator = TokenAuthenticator (config ["client_secret" ])
173
184
174
- if "start_date" in config :
175
- start_timestamp = self ._start_date_to_timestamp (config ["start_date" ])
176
- else :
177
- start_timestamp = pendulum .datetime (2017 , 1 , 25 ).int_timestamp
185
+ start_timestamp = self ._start_date_to_timestamp (config )
178
186
args = {
179
187
"authenticator" : authenticator ,
180
188
"account_id" : config ["account_id" ],
@@ -289,7 +297,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
289
297
# The Refunds stream does not utilize the Events API as it created issues with data loss during the incremental syncs.
290
298
# Therefore, we're using the regular API with the `created` cursor field. A bug has been filed with Stripe.
291
299
# See more at https://github.com/airbytehq/oncall/issues/3090, https://github.com/airbytehq/oncall/issues/3428
292
- CreatedCursorIncrementalStripeStream (name = "refunds" , path = "refunds" , ** incremental_args ),
300
+ CreatedCursorIncrementalStripeStream (name = _REFUND_STREAM_NAME , path = "refunds" , ** incremental_args ),
293
301
UpdatedCursorIncrementalStripeStream (
294
302
name = "payment_methods" ,
295
303
path = "payment_methods" ,
@@ -511,21 +519,44 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]:
511
519
),
512
520
]
513
521
514
- return [
515
- StreamFacade .create_from_stream (stream , self , entrypoint_logger , self ._create_empty_state (), NoopCursor ())
516
- if stream .name in self ._streams_configured_as_full_refresh
517
- else stream
518
- for stream in streams
519
- ]
522
+ state_manager = ConnectorStateManager (stream_instance_map = {s .name : s for s in streams }, state = self ._state )
523
+ return [self ._to_concurrent (stream , self ._start_date_to_timestamp (config ), state_manager ) for stream in streams ]
524
+
525
+ def _to_concurrent (self , stream : Stream , fallback_start , state_manager : ConnectorStateManager ) -> Stream :
526
+ if stream .name in self ._streams_configured_as_full_refresh :
527
+ return StreamFacade .create_from_stream (stream , self , entrypoint_logger , self ._create_empty_state (), NoopCursor ())
528
+
529
+ state = state_manager .get_stream_state (stream .name , stream .namespace )
530
+ slice_boundary_fields = self ._SLICE_BOUNDARY_FIELDS_BY_IMPLEMENTATION .get (type (stream ))
531
+ if slice_boundary_fields and stream .name not in _INCREMENTAL_CONCURRENCY_EXCLUSION :
532
+ cursor_field = CursorField (stream .cursor_field ) if isinstance (stream .cursor_field , str ) else CursorField (stream .cursor_field [0 ])
533
+ converter = EpochValueConcurrentStreamStateConverter ()
534
+ cursor = ConcurrentCursor (
535
+ stream .name ,
536
+ stream .namespace ,
537
+ state_manager .get_stream_state (stream .name , stream .namespace ),
538
+ self .message_repository ,
539
+ state_manager ,
540
+ converter ,
541
+ cursor_field ,
542
+ slice_boundary_fields ,
543
+ fallback_start ,
544
+ )
545
+ return StreamFacade .create_from_stream (stream , self , entrypoint_logger , state , cursor )
546
+
547
+ return stream
520
548
521
549
def _create_empty_state (self ) -> MutableMapping [str , Any ]:
522
- # The state is known to be empty because concurrent CDK is currently only used for full refresh
523
550
return {}
524
551
525
552
@staticmethod
526
- def _start_date_to_timestamp (start_date : str ) -> int :
553
+ def _start_date_to_timestamp (config : Mapping [str , Any ]) -> int :
554
+ if "start_date" not in config :
555
+ return pendulum .datetime (2017 , 1 , 25 ).int_timestamp # type: ignore # pendulum not typed
556
+
557
+ start_date = config ["start_date" ]
527
558
try :
528
- return pendulum .parse (start_date ).int_timestamp
559
+ return pendulum .parse (start_date ).int_timestamp # type: ignore # pendulum not typed
529
560
except pendulum .parsing .exceptions .ParserError as e :
530
561
message = f"Invalid start date { start_date } . Please use YYYY-MM-DDTHH:MM:SSZ format."
531
562
raise AirbyteTracedException (
0 commit comments