Skip to content

Commit 5242ff8

Browse files
girardasherifnada
andauthored
low-code connectors: reset pagination between stream slices (#15330)
* reset pagination between stream slices * Update airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py Co-authored-by: Sherif A. Nada <[email protected]> * Update airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py Co-authored-by: Sherif A. Nada <[email protected]> * patch Co-authored-by: Sherif A. Nada <[email protected]>
1 parent c5c13f0 commit 5242ff8

File tree

12 files changed

+62
-8
lines changed

12 files changed

+62
-8
lines changed

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/limit_paginator.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ def get_request_body_json(
147147
) -> Mapping[str, Any]:
148148
return self._get_request_options(RequestOptionType.body_json)
149149

150+
def reset(self):
151+
self.pagination_strategy.reset()
152+
150153
def _get_request_options(self, option_type: RequestOptionType) -> Mapping[str, Any]:
151154
options = {}
152155
if self.page_token_option.inject_into == option_type:

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,3 +59,7 @@ def get_request_body_json(
5959

6060
def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Mapping[str, Any]:
6161
return {}
62+
63+
def reset(self):
64+
# No state to reset
65+
pass

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,12 @@ class Paginator(RequestOptionsProvider):
1919
If the next_page_token is the path to the next page of records, then it should be accessed through the `path` method
2020
"""
2121

22+
@abstractmethod
23+
def reset(self):
24+
"""
25+
Reset the pagination's inner state
26+
"""
27+
2228
@abstractmethod
2329
def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Mapping[str, Any]]:
2430
"""

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/cursor_pagination_strategy.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,7 @@ def next_page_token(self, response: requests.Response, last_records: List[Mappin
4646
return None
4747
token = self.cursor_value.eval(config=self.config, last_records=last_records, response=decoded_response)
4848
return token if token else None
49+
50+
def reset(self):
51+
# No state to reset
52+
pass

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/offset_increment.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,6 @@ def next_page_token(self, response: requests.Response, last_records: List[Mappin
3131
else:
3232
self._offset += len(last_records)
3333
return self._offset
34+
35+
def reset(self):
36+
self._offset = 0

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/page_increment.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,14 @@ class PageIncrement(PaginationStrategy, JsonSchemaMixin):
2323
options: InitVar[Mapping[str, Any]]
2424

2525
def __post_init__(self, options: Mapping[str, Any]):
26-
self._offset = 0
26+
self._page = 0
2727

2828
def next_page_token(self, response: requests.Response, last_records: List[Mapping[str, Any]]) -> Optional[Any]:
2929
if len(last_records) < self.page_size:
3030
return None
3131
else:
32-
self._offset += 1
33-
return self._offset
32+
self._page += 1
33+
return self._page
34+
35+
def reset(self):
36+
self._page = 0

airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/strategies/pagination_strategy.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,9 @@ def next_page_token(self, response: requests.Response, last_records: List[Mappin
2424
:return: next page token. Returns None if there are no more pages to fetch
2525
"""
2626
pass
27+
28+
@abstractmethod
29+
def reset(self):
30+
"""
31+
Reset the pagination's inner state
32+
"""

airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ def read_records(
342342
) -> Iterable[Mapping[str, Any]]:
343343
# Warning: use self.state instead of the stream_state passed as argument!
344344
stream_slice = stream_slice or {} # None-check
345+
self.paginator.reset()
345346
records_generator = HttpStream.read_records(self, sync_mode, cursor_field, stream_slice, self.state)
346347
for r in records_generator:
347348
self.stream_slicer.update_cursor(stream_slice, last_record=r)

airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_limit_paginator.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#
44

55
import json
6+
from unittest.mock import MagicMock
67

78
import pytest
89
import requests
@@ -159,3 +160,13 @@ def test_limit_cannot_be_set_in_path():
159160
assert False
160161
except ValueError:
161162
pass
163+
164+
165+
def test_reset():
166+
limit_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="limit", options={})
167+
page_token_request_option = RequestOption(inject_into=RequestOptionType.request_parameter, field_name="offset", options={})
168+
url_base = "https://airbyte.io"
169+
config = {}
170+
strategy = MagicMock()
171+
LimitPaginator(2, limit_request_option, page_token_request_option, strategy, config, url_base, options={}).reset()
172+
assert strategy.reset.called

airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_offset_increment.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,6 @@ def test_offset_increment_paginator_strategy(test_name, page_size, expected_next
3030
next_page_token = paginator_strategy.next_page_token(response, last_records)
3131
assert expected_next_page_token == next_page_token
3232
assert expected_offset == paginator_strategy._offset
33+
34+
paginator_strategy.reset()
35+
assert 0 == paginator_strategy._offset

airbyte-cdk/python/unit_tests/sources/declarative/requesters/paginators/test_page_increment.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
)
1919
def test_page_increment_paginator_strategy(test_name, page_size, expected_next_page_token, expected_offset):
2020
paginator_strategy = PageIncrement(page_size, options={})
21-
assert paginator_strategy._offset == 0
21+
assert paginator_strategy._page == 0
2222

2323
response = requests.Response()
2424

@@ -29,4 +29,7 @@ def test_page_increment_paginator_strategy(test_name, page_size, expected_next_p
2929

3030
next_page_token = paginator_strategy.next_page_token(response, last_records)
3131
assert expected_next_page_token == next_page_token
32-
assert expected_offset == paginator_strategy._offset
32+
assert expected_offset == paginator_strategy._page
33+
34+
paginator_strategy.reset()
35+
assert 0 == paginator_strategy._page

airbyte-cdk/python/unit_tests/sources/declarative/retrievers/test_simple_retriever.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
33
#
44

5-
from unittest.mock import MagicMock
5+
from unittest.mock import MagicMock, patch
66

77
import airbyte_cdk.sources.declarative.requesters.error_handlers.response_status as response_status
88
import pytest
@@ -15,13 +15,15 @@
1515
from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
1616
from airbyte_cdk.sources.declarative.retrievers.simple_retriever import SimpleRetriever
1717
from airbyte_cdk.sources.streams.http.auth import NoAuth
18+
from airbyte_cdk.sources.streams.http.http import HttpStream
1819

1920
primary_key = "pk"
2021
records = [{"id": 1}, {"id": 2}]
2122
config = {}
2223

2324

24-
def test_simple_retriever_full():
25+
@patch.object(HttpStream, "read_records", return_value=[])
26+
def test_simple_retriever_full(mock_http_stream):
2527
requester = MagicMock()
2628
request_params = {"param": "value"}
2729
requester.get_request_params.return_value = request_params
@@ -53,6 +55,9 @@ def test_simple_retriever_full():
5355
backoff_time = 60
5456
should_retry = ResponseStatus.retry(backoff_time)
5557
requester.should_retry.return_value = should_retry
58+
request_body_json = {"body": "json"}
59+
requester.request_body_json.return_value = request_body_json
60+
5661
request_body_data = {"body": "data"}
5762
requester.get_request_body_data.return_value = request_body_data
5863
request_body_json = {"body": "json"}
@@ -92,12 +97,14 @@ def test_simple_retriever_full():
9297
assert not retriever.raise_on_http_errors
9398
assert retriever.should_retry(requests.Response())
9499
assert retriever.backoff_time(requests.Response()) == backoff_time
95-
assert retriever.request_body_data(None, None, None) == request_body_data
96100
assert retriever.request_body_json(None, None, None) == request_body_json
97101
assert retriever.request_kwargs(None, None, None) == request_kwargs
98102
assert retriever.cache_filename == cache_filename
99103
assert retriever.use_cache == use_cache
100104

105+
[r for r in retriever.read_records(SyncMode.full_refresh)]
106+
paginator.reset.assert_called()
107+
101108

102109
@pytest.mark.parametrize(
103110
"test_name, requester_response, expected_should_retry, expected_backoff_time",

0 commit comments

Comments
 (0)