airbytehq · girarda · May 15, 2024 · Mar 22, 2024 · Mar 22, 2024 · Mar 22, 2024
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte-cdk/python/airbyte_cdk/sources/declarative/declarative_component_schema.yaml
@@ -353,11 +353,12 @@ definitions:
         interpolation_context:
           - config
           - headers
-          - last_records
+          - last_page_size
+          - last_record
           - response
         examples:
           - "{{ headers.link.next.cursor }}"
-          - "{{ last_records[-1]['key'] }}"
+          - "{{ last_record['key'] }}"
           - "{{ response['nextPage'] }}"
       page_size:
         title: Page Size
@@ -372,7 +373,7 @@ definitions:
         interpolation_context:
           - config
           - headers
-          - last_records
+          - last_record
           - response
         examples:
           - "{{ response.data.has_more is false }}"
@@ -2306,20 +2307,20 @@ interpolation:
           x-ratelimit-limit: "600"
           x-ratelimit-remaining: "598"
           x-ratelimit-reset: "39"
-    - title: last_records
-      description: List of records extracted from the last response received from the API.
-      type: list
+    - title: last_record
+      description: Last record extracted from the response received from the API.
+      type: object
+      examples:
+        - name: "Test List: 19"
+          id: 0236d6d2
+          contact_count: 20
+          _metadata:
+            self: https://api.sendgrid.com/v3/marketing/lists/0236d6d2
+    - title: last_page_size
+      description: Number of records extracted from the last response received from the API.
+      type: object
       examples:
-        - - name: "Test List: 19"
-            id: 0236d6d2
-            contact_count: 20
-            _metadata:
-              self: https://api.sendgrid.com/v3/marketing/lists/0236d6d2
-          - name: List for CI tests, number 30
-            id: 041ee031
-            contact_count: 0
-            _metadata:
-              self: https://api.sendgrid.com/v3/marketing/lists/041ee031
+        - 2
     - title: next_page_token
       description: Object describing the token to fetch the next page of records. The object has a single key "next_page_token".
       type: object

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/dpath_extractor.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass
-from typing import Any, List, Mapping, Union
+from typing import Any, Iterable, List, Mapping, Union
 
 import dpath.util
 import requests
@@ -58,24 +58,22 @@ class DpathExtractor(RecordExtractor):
     parameters: InitVar[Mapping[str, Any]]
     decoder: Decoder = JsonDecoder(parameters={})
 
-    def __post_init__(self, parameters: Mapping[str, Any]):
-        for path_index in range(len(self.field_path)):
-            if isinstance(self.field_path[path_index], str):
-                self.field_path[path_index] = InterpolatedString.create(self.field_path[path_index], parameters=parameters)
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        self._field_path = [InterpolatedString.create(path, parameters=parameters) for path in self.field_path]
 
-    def extract_records(self, response: requests.Response) -> List[Mapping[str, Any]]:
+    def extract_records(self, response: requests.Response) -> Iterable[Mapping[str, Any]]:
         response_body = self.decoder.decode(response)
-        if len(self.field_path) == 0:
+        if len(self._field_path) == 0:
             extracted = response_body
         else:
-            path = [path.eval(self.config) for path in self.field_path]
+            path = [path.eval(self.config) for path in self._field_path]
             if "*" in path:
                 extracted = dpath.util.values(response_body, path)
             else:
                 extracted = dpath.util.get(response_body, path, default=[])
         if isinstance(extracted, list):
-            return extracted
+            yield from extracted
         elif extracted:
-            return [extracted]
+            yield extracted
         else:
-            return []
+            yield from []
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/http_selector.py
@@ -4,7 +4,7 @@
 
 from abc import abstractmethod
 from dataclasses import dataclass
-from typing import Any, List, Mapping, Optional
+from typing import Any, Iterable, List, Mapping, Optional
 
 import requests
 from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState
@@ -25,7 +25,7 @@ def select_records(
         records_schema: Mapping[str, Any],
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> List[Record]:
+    ) -> Iterable[Record]:
         """
         Selects records from the response
         :param response: The response to select the records from

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_extractor.py
@@ -4,7 +4,7 @@
 
 from abc import abstractmethod
 from dataclasses import dataclass
-from typing import Any, List, Mapping
+from typing import Any, Iterable, List, Mapping
 
 import requests
 
@@ -19,7 +19,7 @@ class RecordExtractor:
     def extract_records(
         self,
         response: requests.Response,
-    ) -> List[Mapping[str, Any]]:
+    ) -> Iterable[Mapping[str, Any]]:
         """
         Selects records from the response
         :param response: The response to extract the records from

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_filter.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass
-from typing import Any, List, Mapping, Optional
+from typing import Any, Iterable, List, Mapping, Optional
 
 from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
 from airbyte_cdk.sources.declarative.types import Config, StreamSlice, StreamState
@@ -27,10 +27,12 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
 
     def filter_records(
         self,
-        records: List[Mapping[str, Any]],
+        records: Iterable[Mapping[str, Any]],
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> List[Mapping[str, Any]]:
+    ) -> Iterable[Mapping[str, Any]]:
         kwargs = {"stream_state": stream_state, "stream_slice": stream_slice, "next_page_token": next_page_token}
-        return [record for record in records if self._filter_interpolator.eval(self.config, record=record, **kwargs)]
+        for record in records:
+            if self._filter_interpolator.eval(self.config, record=record, **kwargs):
+                yield record
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py
@@ -3,7 +3,7 @@
 #
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, List, Mapping, Optional
+from typing import Any, Iterable, List, Mapping, Optional
 
 import requests
 from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
@@ -50,7 +50,7 @@ def select_records(
         records_schema: Mapping[str, Any],
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> List[Record]:
+    ) -> Iterable[Record]:
         """
         Selects records from the response
         :param response: The response to select the records from
@@ -60,38 +60,47 @@ def select_records(
         :param next_page_token: The paginator token
         :return: List of Records selected from the response
         """
-        all_data = self.extractor.extract_records(response)
+        all_data: Iterable[Mapping[str, Any]] = self.extractor.extract_records(response)
         filtered_data = self._filter(all_data, stream_state, stream_slice, next_page_token)
-        self._transform(filtered_data, stream_state, stream_slice)
-        self._normalize_by_schema(filtered_data, schema=records_schema)
-        return [Record(data, stream_slice) for data in filtered_data]
+        transformed_data = self._transform(filtered_data, stream_state, stream_slice)
+        normalized_data = self._normalize_by_schema(transformed_data, schema=records_schema)
+        for data in normalized_data:
+            yield Record(data, stream_slice)
 
-    def _normalize_by_schema(self, records: List[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]) -> List[Mapping[str, Any]]:
+    def _normalize_by_schema(
+        self, records: Iterable[Mapping[str, Any]], schema: Optional[Mapping[str, Any]]
+    ) -> Iterable[Mapping[str, Any]]:
         if schema:
             # record has type Mapping[str, Any], but dict[str, Any] expected
-            return [self.schema_normalization.transform(record, schema) for record in records]  # type: ignore
-        return records
+            for record in records:
+                normalized_record = dict(record)
+                self.schema_normalization.transform(normalized_record, schema)
+                yield normalized_record
+        else:
+            yield from records
 
     def _filter(
         self,
-        records: List[Mapping[str, Any]],
+        records: Iterable[Mapping[str, Any]],
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice],
         next_page_token: Optional[Mapping[str, Any]],
-    ) -> List[Mapping[str, Any]]:
+    ) -> Iterable[Mapping[str, Any]]:
         if self.record_filter:
-            return self.record_filter.filter_records(
+            yield from self.record_filter.filter_records(
                 records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token
             )
-        return records
+        else:
+            yield from records
 
     def _transform(
         self,
-        records: List[Mapping[str, Any]],
+        records: Iterable[Mapping[str, Any]],
         stream_state: StreamState,
         stream_slice: Optional[StreamSlice] = None,
-    ) -> None:
+    ) -> Iterable[Mapping[str, Any]]:
         for record in records:
             for transformation in self.transformations:
                 # record has type Mapping[str, Any], but Record expected
                 transformation.transform(record, config=self.config, stream_state=stream_state, stream_slice=stream_slice)  # type: ignore
+            yield record
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/models/declarative_component_schema.py
@@ -880,7 +880,7 @@ class CursorPagination(BaseModel):
         description='Value of the cursor defining the next page to fetch.',
         examples=[
             '{{ headers.link.next.cursor }}',
-            "{{ last_records[-1]['key'] }}",
+            "{{ last_record['key'] }}",
             "{{ response['nextPage'] }}",
         ],
         title='Cursor Value',

diff --git a/...yte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py b/...yte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/default_paginator.py
@@ -101,8 +101,10 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
             self.url_base = InterpolatedString(string=self.url_base, parameters=parameters)
         self._token = self.pagination_strategy.initial_token
 
-    def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Optional[Mapping[str, Any]]:
-        self._token = self.pagination_strategy.next_page_token(response, last_records)
+    def next_page_token(
+        self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
+    ) -> Optional[Mapping[str, Any]]:
+        self._token = self.pagination_strategy.next_page_token(response, last_page_size, last_record)
         if self._token:
             return {"next_page_token": self._token}
         else:
@@ -164,9 +166,9 @@ def _get_request_options(self, option_type: RequestOptionType) -> MutableMapping
             and isinstance(self.page_token_option, RequestOption)
             and self.page_token_option.inject_into == option_type
         ):
-            options[self.page_token_option.field_name.eval(config=self.config)] = self._token
+            options[self.page_token_option.field_name.eval(config=self.config)] = self._token  # type: ignore # field_name is known to be an interpolated string
         if self.page_size_option and self.pagination_strategy.get_page_size() and self.page_size_option.inject_into == option_type:
-            options[self.page_size_option.field_name.eval(config=self.config)] = self.pagination_strategy.get_page_size()
+            options[self.page_size_option.field_name.eval(config=self.config)] = self.pagination_strategy.get_page_size()  # type: ignore # field_name is known to be an interpolated string
         return options
 
 
@@ -185,12 +187,14 @@ def __init__(self, decorated: Paginator, maximum_number_of_pages: int = 5) -> No
         self._decorated = decorated
         self._page_count = self._PAGE_COUNT_BEFORE_FIRST_NEXT_CALL
 
-    def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Optional[Mapping[str, Any]]:
+    def next_page_token(
+        self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
+    ) -> Optional[Mapping[str, Any]]:
         if self._page_count >= self._maximum_number_of_pages:
             return None
 
         self._page_count += 1
-        return self._decorated.next_page_token(response, last_records)
+        return self._decorated.next_page_token(response, last_page_size, last_record)
 
     def path(self) -> Optional[str]:
         return self._decorated.path()
@@ -201,7 +205,7 @@ def get_request_params(
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> MutableMapping[str, Any]:
+    ) -> Mapping[str, Any]:
         return self._decorated.get_request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
 
     def get_request_headers(
@@ -219,7 +223,7 @@ def get_request_body_data(
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> Optional[Union[Mapping[str, Any], str]]:
+    ) -> Union[Mapping[str, Any], str]:
         return self._decorated.get_request_body_data(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
 
     def get_request_body_json(
@@ -228,7 +232,7 @@ def get_request_body_json(
         stream_state: Optional[StreamState] = None,
         stream_slice: Optional[StreamSlice] = None,
         next_page_token: Optional[Mapping[str, Any]] = None,
-    ) -> Optional[Mapping[str, Any]]:
+    ) -> Mapping[str, Any]:
         return self._decorated.get_request_body_json(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token)
 
     def reset(self) -> None:

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/no_pagination.py
@@ -57,7 +57,7 @@ def get_request_body_json(
     ) -> Mapping[str, Any]:
         return {}
 
-    def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Mapping[str, Any]:
+    def next_page_token(self, response: requests.Response, last_page_size: int, last_record: Optional[Record]) -> Mapping[str, Any]:
         return {}
 
     def reset(self) -> None:

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/paginators/paginator.py
@@ -27,12 +27,15 @@ def reset(self) -> None:
         """
 
     @abstractmethod
-    def next_page_token(self, response: requests.Response, last_records: List[Record]) -> Optional[Mapping[str, Any]]:
+    def next_page_token(
+        self, response: requests.Response, last_page_size: int, last_record: Optional[Record]
+    ) -> Optional[Mapping[str, Any]]:
         """
         Returns the next_page_token to use to fetch the next page of records.
 
         :param response: the response to process
-        :param last_records: the records extracted from the response
+        :param last_page_size: the number of records read from the response
+        :param last_record: the last record extracted from the response
         :return: A mapping {"next_page_token": <token>} for the next page from the input response object. Returning None means there are no more pages to read in this response.
         """
         pass