|
2 | 2 | # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
|
3 | 3 | #
|
4 | 4 |
|
5 |
| -from unittest.mock import patch |
| 5 | +from unittest.mock import MagicMock, patch |
6 | 6 |
|
7 |
| -from airbyte_cdk.models import SyncMode |
8 |
| -from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import StreamSlice |
| 7 | +from airbyte_cdk.models import ( |
| 8 | + AirbyteStateBlob, |
| 9 | + AirbyteStateMessage, |
| 10 | + AirbyteStateType, |
| 11 | + AirbyteStream, |
| 12 | + AirbyteStreamState, |
| 13 | + ConfiguredAirbyteCatalog, |
| 14 | + ConfiguredAirbyteStream, |
| 15 | + DestinationSyncMode, |
| 16 | + StreamDescriptor, |
| 17 | + SyncMode, |
| 18 | +) |
| 19 | +from airbyte_cdk.sources.declarative.incremental.per_partition_cursor import PerPartitionCursor, StreamSlice |
9 | 20 | from airbyte_cdk.sources.declarative.manifest_declarative_source import ManifestDeclarativeSource
|
10 | 21 | from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
|
11 | 22 | from airbyte_cdk.sources.types import Record
|
@@ -268,3 +279,93 @@ def test_substream_without_input_state():
|
268 | 279 | cursor_slice={"start_time": "2022-02-01", "end_time": "2022-02-28"},
|
269 | 280 | ),
|
270 | 281 | ]
|
| 282 | + |
| 283 | + |
| 284 | +def test_partition_limitation(): |
| 285 | + source = ManifestDeclarativeSource( |
| 286 | + source_config=ManifestBuilder() |
| 287 | + .with_list_partition_router("Rates", "partition_field", ["1", "2", "3"]) |
| 288 | + .with_incremental_sync( |
| 289 | + "Rates", |
| 290 | + start_datetime="2022-01-01", |
| 291 | + end_datetime="2022-02-28", |
| 292 | + datetime_format="%Y-%m-%d", |
| 293 | + cursor_field=CURSOR_FIELD, |
| 294 | + step="P1M", |
| 295 | + cursor_granularity="P1D", |
| 296 | + ) |
| 297 | + .build() |
| 298 | + ) |
| 299 | + |
| 300 | + partition_slices = [ |
| 301 | + StreamSlice(partition={"partition_field": "1"}, cursor_slice={}), |
| 302 | + StreamSlice(partition={"partition_field": "2"}, cursor_slice={}), |
| 303 | + StreamSlice(partition={"partition_field": "3"}, cursor_slice={}), |
| 304 | + ] |
| 305 | + |
| 306 | + records_list = [ |
| 307 | + [ |
| 308 | + Record({"a record key": "a record value", CURSOR_FIELD: "2022-01-15"}, partition_slices[0]), |
| 309 | + Record({"a record key": "a record value", CURSOR_FIELD: "2022-01-16"}, partition_slices[0]), |
| 310 | + ], |
| 311 | + [Record({"a record key": "a record value", CURSOR_FIELD: "2022-02-15"}, partition_slices[0])], |
| 312 | + [Record({"a record key": "a record value", CURSOR_FIELD: "2022-01-16"}, partition_slices[1])], |
| 313 | + [], |
| 314 | + [], |
| 315 | + [Record({"a record key": "a record value", CURSOR_FIELD: "2022-02-17"}, partition_slices[2])], |
| 316 | + ] |
| 317 | + |
| 318 | + configured_stream = ConfiguredAirbyteStream( |
| 319 | + stream=AirbyteStream(name="Rates", json_schema={}, supported_sync_modes=[SyncMode.full_refresh, SyncMode.incremental]), |
| 320 | + sync_mode=SyncMode.incremental, |
| 321 | + destination_sync_mode=DestinationSyncMode.append, |
| 322 | + ) |
| 323 | + catalog = ConfiguredAirbyteCatalog(streams=[configured_stream]) |
| 324 | + |
| 325 | + initial_state = [ |
| 326 | + AirbyteStateMessage( |
| 327 | + type=AirbyteStateType.STREAM, |
| 328 | + stream=AirbyteStreamState( |
| 329 | + stream_descriptor=StreamDescriptor(name="post_comment_votes", namespace=None), |
| 330 | + stream_state=AirbyteStateBlob.parse_obj( |
| 331 | + { |
| 332 | + "states": [ |
| 333 | + { |
| 334 | + "partition": {"partition_field": "1"}, |
| 335 | + "cursor": {CURSOR_FIELD: "2022-01-01"}, |
| 336 | + }, |
| 337 | + { |
| 338 | + "partition": {"partition_field": "2"}, |
| 339 | + "cursor": {CURSOR_FIELD: "2022-01-02"}, |
| 340 | + }, |
| 341 | + { |
| 342 | + "partition": {"partition_field": "3"}, |
| 343 | + "cursor": {CURSOR_FIELD: "2022-01-03"}, |
| 344 | + }, |
| 345 | + ] |
| 346 | + } |
| 347 | + ), |
| 348 | + ), |
| 349 | + ) |
| 350 | + ] |
| 351 | + logger = MagicMock() |
| 352 | + |
| 353 | + # with patch.object(PerPartitionCursor, "stream_slices", return_value=partition_slices): |
| 354 | + with patch.object(SimpleRetriever, "_read_pages", side_effect=records_list): |
| 355 | + with patch.object(PerPartitionCursor, "DEFAULT_MAX_PARTITIONS_NUMBER", 2): |
| 356 | + output = list(source.read(logger, {}, catalog, initial_state)) |
| 357 | + |
| 358 | + # assert output_data == expected_records |
| 359 | + final_state = [message.state.stream.stream_state.dict() for message in output if message.state] |
| 360 | + assert final_state[-1] == { |
| 361 | + "states": [ |
| 362 | + { |
| 363 | + "partition": {"partition_field": "2"}, |
| 364 | + "cursor": {CURSOR_FIELD: "2022-01-16"}, |
| 365 | + }, |
| 366 | + { |
| 367 | + "partition": {"partition_field": "3"}, |
| 368 | + "cursor": {CURSOR_FIELD: "2022-02-17"}, |
| 369 | + }, |
| 370 | + ] |
| 371 | + } |
0 commit comments