airbytehq · brianjlai · Sep 17, 2024 · Sep 12, 2024 · Sep 12, 2024 · Sep 16, 2024
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/model_to_component_factory.py
@@ -140,7 +140,12 @@
     StopConditionPaginationStrategyDecorator,
 )
 from airbyte_cdk.sources.declarative.requesters.request_option import RequestOptionType
-from airbyte_cdk.sources.declarative.requesters.request_options import InterpolatedRequestOptionsProvider
+from airbyte_cdk.sources.declarative.requesters.request_options import (
+    DatetimeBasedRequestOptionsProvider,
+    DefaultRequestOptionsProvider,
+    InterpolatedRequestOptionsProvider,
+    RequestOptionsProvider,
+)
 from airbyte_cdk.sources.declarative.requesters.request_path import RequestPath
 from airbyte_cdk.sources.declarative.requesters.requester import HttpMethod
 from airbyte_cdk.sources.declarative.retrievers import AsyncRetriever, SimpleRetriever, SimpleRetrieverTestReadDecorator
@@ -653,6 +658,40 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi
                 "per_partition_cursor": combined_slicers if isinstance(combined_slicers, PerPartitionCursor) else None,
                 "is_global_substream_cursor": isinstance(combined_slicers, GlobalSubstreamCursor),
             }
+
+        if model.incremental_sync and isinstance(model.incremental_sync, DatetimeBasedCursorModel):
+            cursor_model = model.incremental_sync
+
+            end_time_option = (
+                RequestOption(
+                    inject_into=RequestOptionType(cursor_model.end_time_option.inject_into.value),
+                    field_name=cursor_model.end_time_option.field_name,
+                    parameters=cursor_model.parameters or {},
+                )
+                if cursor_model.end_time_option
+                else None
+            )
+            start_time_option = (
+                RequestOption(
+                    inject_into=RequestOptionType(cursor_model.start_time_option.inject_into.value),
+                    field_name=cursor_model.start_time_option.field_name,
+                    parameters=cursor_model.parameters or {},
+                )
+                if cursor_model.start_time_option
+                else None
+            )
+
+            request_options_provider = DatetimeBasedRequestOptionsProvider(
+                start_time_option=start_time_option,
+                end_time_option=end_time_option,
+                partition_field_start=cursor_model.partition_field_end,
+                partition_field_end=cursor_model.partition_field_end,
+                config=config,
+                parameters=model.parameters or {},
+            )
+        else:
+            request_options_provider = None
+
         transformations = []
         if model.transformations:
             for transformation_model in model.transformations:
@@ -663,6 +702,7 @@ def create_declarative_stream(self, model: DeclarativeStreamModel, config: Confi
             name=model.name,
             primary_key=primary_key,
             stream_slicer=combined_slicers,
+            request_options_provider=request_options_provider,
             stop_condition_on_cursor=stop_condition_on_cursor,
             client_side_incremental_sync=client_side_incremental_sync,
             transformations=transformations,
@@ -1126,6 +1166,7 @@ def create_simple_retriever(
         name: str,
         primary_key: Optional[Union[str, List[str], List[List[str]]]],
         stream_slicer: Optional[StreamSlicer],
+        request_options_provider: Optional[RequestOptionsProvider] = None,
         stop_condition_on_cursor: bool = False,
         client_side_incremental_sync: Optional[Dict[str, Any]] = None,
         transformations: List[RecordTransformation],
@@ -1140,11 +1181,19 @@ def create_simple_retriever(
             client_side_incremental_sync=client_side_incremental_sync,
         )
         url_base = model.requester.url_base if hasattr(model.requester, "url_base") else requester.get_url_base()
-        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
 
         # Define cursor only if per partition or common incremental support is needed
         cursor = stream_slicer if isinstance(stream_slicer, DeclarativeCursor) else None
 
+        if not stream_slicer or not isinstance(stream_slicer, DatetimeBasedCursor) or type(stream_slicer) is not DatetimeBasedCursor:
+            # Many of the custom component implementations of DatetimeBasedCursor override get_request_params() (or other methods).
+            # Because we're decoupling RequestOptionsProvider from the Cursor, custom components will eventually need to reimplement
+            # their own RequestOptionsProvider. However, right now the existing StreamSlicer/Cursor still can act as the SimpleRetriever's
+            # request_options_provider
+            request_options_provider = stream_slicer or DefaultRequestOptionsProvider(config=config, parameters={})
+
+        stream_slicer = stream_slicer or SinglePartitionRouter(parameters={})
+
         cursor_used_for_stop_condition = cursor if stop_condition_on_cursor else None
         paginator = (
             self._create_component_from_model(
@@ -1168,6 +1217,7 @@ def create_simple_retriever(
                 requester=requester,
                 record_selector=record_selector,
                 stream_slicer=stream_slicer,
+                request_option_provider=request_options_provider,
                 cursor=cursor,
                 config=config,
                 maximum_number_of_slices=self._limit_slices_fetched or 5,
@@ -1181,6 +1231,7 @@ def create_simple_retriever(
             requester=requester,
             record_selector=record_selector,
             stream_slicer=stream_slicer,
+            request_option_provider=request_options_provider,
             cursor=cursor,
             config=config,
             ignore_stream_slicer_parameters_on_paginated_requests=ignore_stream_slicer_parameters_on_paginated_requests,

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/requesters/request_options/__init__.py
@@ -2,9 +2,13 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 
+from airbyte_cdk.sources.declarative.requesters.request_options.datetime_based_request_options_provider import (
+    DatetimeBasedRequestOptionsProvider,
+)
+from airbyte_cdk.sources.declarative.requesters.request_options.default_request_options_provider import DefaultRequestOptionsProvider
 from airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_options_provider import (
     InterpolatedRequestOptionsProvider,
 )
 from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider
 
-__all__ = ["InterpolatedRequestOptionsProvider", "RequestOptionsProvider"]
+__all__ = ["DatetimeBasedRequestOptionsProvider", "DefaultRequestOptionsProvider", "InterpolatedRequestOptionsProvider", "RequestOptionsProvider"]
diff --git a/...sources/declarative/requesters/request_options/datetime_based_request_options_provider.py b/...sources/declarative/requesters/request_options/datetime_based_request_options_provider.py
@@ -0,0 +1,84 @@
+#
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+#
+
+from dataclasses import InitVar, dataclass
+from typing import Any, Iterable, Mapping, MutableMapping, Optional, Union
+
+from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
+from airbyte_cdk.sources.declarative.requesters.request_option import RequestOption, RequestOptionType
+from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+
+
+@dataclass
+class DatetimeBasedRequestOptionsProvider(RequestOptionsProvider):
+    """
+    Request options provider that extracts fields from the stream_slice and injects them into the respective location in the
+    outbound request being made
+    """
+
+    config: Config
+    parameters: InitVar[Mapping[str, Any]]
+    start_time_option: Optional[RequestOption] = None
+    end_time_option: Optional[RequestOption] = None
+    partition_field_start: Optional[str] = None
+    partition_field_end: Optional[str] = None
+
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        self._partition_field_start = InterpolatedString.create(self.partition_field_start or "start_time", parameters=parameters)
+        self._partition_field_end = InterpolatedString.create(self.partition_field_end or "end_time", parameters=parameters)
+
+    def stream_slices(self) -> Iterable[StreamSlice]:
+        # When all processing is managed by ConcurrentCursor and concurrent read processing, this method shouldn't end up being used
+        # It kinda sucks we have to implement this given that partition generation should really just be a responsibility of the
+        # cursor but making this class implement StreamSlicer makes it easier in the model_to_component_factory
+        yield from [{}]
+
+    def get_request_params(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return self._get_request_options(RequestOptionType.request_parameter, stream_slice)
+
+    def get_request_headers(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return self._get_request_options(RequestOptionType.header, stream_slice)
+
+    def get_request_body_data(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Union[Mapping[str, Any], str]:
+        return self._get_request_options(RequestOptionType.body_data, stream_slice)
+
+    def get_request_body_json(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return self._get_request_options(RequestOptionType.body_json, stream_slice)
+
+    def _get_request_options(self, option_type: RequestOptionType, stream_slice: Optional[StreamSlice]) -> Mapping[str, Any]:
+        options: MutableMapping[str, Any] = {}
+        if not stream_slice:
+            return options
+        if self.start_time_option and self.start_time_option.inject_into == option_type:
+            options[self.start_time_option.field_name.eval(config=self.config)] = stream_slice.get(  # type: ignore # field_name is always casted to an interpolated string
+                self._partition_field_start.eval(self.config)
+            )
+        if self.end_time_option and self.end_time_option.inject_into == option_type:
+            options[self.end_time_option.field_name.eval(config=self.config)] = stream_slice.get(self._partition_field_end.eval(self.config))  # type: ignore # field_name is always casted to an interpolated string
+        return options
diff --git a/...te_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py b/...te_cdk/sources/declarative/requesters/request_options/default_request_options_provider.py
@@ -0,0 +1,59 @@
+#
+# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
+#
+
+from dataclasses import InitVar, dataclass
+from typing import Any, Mapping, Optional, Union
+
+from airbyte_cdk.sources.declarative.requesters.request_options.request_options_provider import RequestOptionsProvider
+from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+
+
+@dataclass
+class DefaultRequestOptionsProvider(RequestOptionsProvider):
+    """
+    Request options provider that extracts fields from the stream_slice and injects them into the respective location in the
+    outbound request being made
+    """
+
+    config: Config
+    parameters: InitVar[Mapping[str, Any]]
+
+    def __post_init__(self, parameters: Mapping[str, Any]) -> None:
+        pass
+
+    def get_request_params(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+
+    def get_request_headers(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
+
+    def get_request_body_data(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Union[Mapping[str, Any], str]:
+        return {}
+
+    def get_request_body_json(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> Mapping[str, Any]:
+        return {}
diff --git a/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py b/airbyte-cdk/python/airbyte_cdk/sources/declarative/retrievers/simple_retriever.py
@@ -1,6 +1,7 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
+
 import json
 from dataclasses import InitVar, dataclass, field
 from functools import partial
@@ -16,6 +17,7 @@
 from airbyte_cdk.sources.declarative.partition_routers.single_partition_router import SinglePartitionRouter
 from airbyte_cdk.sources.declarative.requesters.paginators.no_pagination import NoPagination
 from airbyte_cdk.sources.declarative.requesters.paginators.paginator import Paginator
+from airbyte_cdk.sources.declarative.requesters.request_options import DefaultRequestOptionsProvider, RequestOptionsProvider
 from airbyte_cdk.sources.declarative.requesters.requester import Requester
 from airbyte_cdk.sources.declarative.retrievers.retriever import Retriever
 from airbyte_cdk.sources.declarative.stream_slicers.stream_slicer import StreamSlicer
@@ -61,6 +63,7 @@ class SimpleRetriever(Retriever):
     _primary_key: str = field(init=False, repr=False, default="")
     paginator: Optional[Paginator] = None
     stream_slicer: StreamSlicer = field(default_factory=lambda: SinglePartitionRouter(parameters={}))
+    request_option_provider: Optional[RequestOptionsProvider] = None
     cursor: Optional[DeclarativeCursor] = None
     ignore_stream_slicer_parameters_on_paginated_requests: bool = False
 
@@ -71,6 +74,7 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
         self._last_record: Optional[Record] = None
         self._parameters = parameters
         self._name = InterpolatedString(self._name, parameters=parameters) if isinstance(self._name, str) else self._name
+        self.request_option_provider = self.request_option_provider or DefaultRequestOptionsProvider(config=self.config, parameters={})
 
         # This mapping is used during a resumable full refresh syncs to indicate whether a partition has started syncing
         # records. Partitions serve as the key and map to True if they already began processing records
@@ -158,7 +162,7 @@ def _request_params(
             stream_slice,
             next_page_token,
             self._paginator.get_request_params,
-            self.stream_slicer.get_request_params,
+            self.request_option_provider.get_request_params,
         )
         if isinstance(params, str):
             raise ValueError("Request params cannot be a string")
@@ -184,7 +188,7 @@ def _request_body_data(
             stream_slice,
             next_page_token,
             self._paginator.get_request_body_data,
-            self.stream_slicer.get_request_body_data,
+            self.request_option_provider.get_request_body_data,
         )
 
     def _request_body_json(
@@ -203,7 +207,7 @@ def _request_body_json(
             stream_slice,
             next_page_token,
             self._paginator.get_request_body_json,
-            self.stream_slicer.get_request_body_json,
+            self.request_option_provider.get_request_body_json,
         )
         if isinstance(body_json, str):
             raise ValueError("Request body json cannot be a string")

diff --git a/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/cursor.py b/airbyte-cdk/python/airbyte_cdk/sources/streams/concurrent/cursor.py
@@ -6,11 +6,13 @@
 from typing import Any, Callable, Iterable, List, Mapping, MutableMapping, Optional, Protocol, Tuple
 
 from airbyte_cdk.sources.connector_state_manager import ConnectorStateManager
+from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
 from airbyte_cdk.sources.message import MessageRepository
 from airbyte_cdk.sources.streams import NO_CURSOR_STATE_KEY
 from airbyte_cdk.sources.streams.concurrent.partitions.partition import Partition
 from airbyte_cdk.sources.streams.concurrent.partitions.record import Record
 from airbyte_cdk.sources.streams.concurrent.state_converters.abstract_stream_state_converter import AbstractStreamStateConverter
+from airbyte_cdk.sources.types import Config
 
 
 def _extract_value(mapping: Mapping[str, Any], path: List[str]) -> Any:
@@ -58,6 +60,23 @@ def extract_value(self, record: Record) -> CursorValueType:
         return cursor_value  # type: ignore  # we assume that the value the path points at is a comparable
 
 
+class InterpolatedCursorField(CursorField):
+    def __init__(self, cursor_field_key: InterpolatedString, config: Config):
+        self._cursor_field_key = cursor_field_key
+        self.config = config
+
+    @property
+    def cursor_field_key(self) -> str:
+        return self._cursor_field_key.eval(config=self.config)
+
+    def extract_value(self, record: Record) -> CursorValueType:
+        resolved_cursor_field_key = self._cursor_field_key.eval(config=self.config)
+        cursor_value = record.data.get(resolved_cursor_field_key)
+        if cursor_value is None:
+            raise ValueError(f"Could not find cursor field {self.cursor_field_key} in record")
+        return cursor_value  # type: ignore  # we assume that the value the path points at is a comparable
+
+
 class Cursor(ABC):
     @property
     @abstractmethod
@@ -143,6 +162,7 @@ def __init__(
         end_provider: Callable[[], CursorValueType],
         lookback_window: Optional[GapType] = None,
         slice_range: Optional[GapType] = None,
+        cursor_granularity: Optional[GapType] = None,
     ) -> None:
         self._stream_name = stream_name
         self._stream_namespace = stream_namespace
@@ -159,6 +179,7 @@ def __init__(
         self.start, self._concurrent_state = self._get_concurrent_state(stream_state)
         self._lookback_window = lookback_window
         self._slice_range = slice_range
+        self._cursor_granularity = cursor_granularity
 
     @property
     def state(self) -> MutableMapping[str, Any]:
@@ -312,7 +333,10 @@ def _split_per_slice_range(self, lower: CursorValueType, upper: CursorValueType)
             current_lower_boundary = lower
             while not stop_processing:
                 current_upper_boundary = min(current_lower_boundary + self._slice_range, upper)
-                yield current_lower_boundary, current_upper_boundary
+                if self._cursor_granularity:
+                    yield current_lower_boundary, current_upper_boundary - self._cursor_granularity
+                else:
+                    yield current_lower_boundary, current_upper_boundary
                 current_lower_boundary = current_upper_boundary
                 if current_upper_boundary >= upper:
                     stop_processing = True