airbytehq
diff --git a/‎airbyte-integrations/connectors/source-hubspot/metadata.yaml
Lines changed: 1 addition & 1 deletion b/‎airbyte-integrations/connectors/source-hubspot/metadata.yaml
Lines changed: 1 addition & 1 deletion
diff --git a/‎airbyte-integrations/connectors/source-hubspot/pyproject.toml
Lines changed: 1 addition & 1 deletion b/‎airbyte-integrations/connectors/source-hubspot/pyproject.toml
Lines changed: 1 addition & 1 deletion
diff --git a/‎airbyte-integrations/connectors/source-hubspot/source_hubspot/components.py
Lines changed: 97 additions & 2 deletions b/‎airbyte-integrations/connectors/source-hubspot/source_hubspot/components.py
Lines changed: 97 additions & 2 deletions
@@ -10,7 +10,7 @@ data:
   connectorSubtype: api
   connectorType: source
   definitionId: 36c891d9-4bd9-43ac-bad2-10e12756272c
-  dockerImageTag: 4.11.0
+  dockerImageTag: 4.12.0
   dockerRepository: airbyte/source-hubspot
   documentationUrl: https://docs.airbyte.com/integrations/sources/hubspot
   erdUrl: https://dbdocs.io/airbyteio/source-hubspot?view=relationships
 
@@ -3,7 +3,7 @@ requires = [ "poetry-core>=1.0.0",]
 build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
-version = "4.11.0"
+version = "4.12.0"
 name = "source-hubspot"
 description = "Source implementation for HubSpot."
 authors = [ "Airbyte <[email protected]>",]
 
@@ -3,19 +3,23 @@
 #
 
 from dataclasses import InitVar, dataclass, field
-from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
+from datetime import timedelta
+from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union
 
 import dpath
 import requests
 
+from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser
 from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
 from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
 from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
 from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
 from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
+from airbyte_cdk.sources.declarative.requesters import HttpRequester
 from airbyte_cdk.sources.declarative.requesters.requester import Requester
 from airbyte_cdk.sources.declarative.transformations import RecordTransformation
 from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
+from airbyte_cdk.utils.datetime_helpers import ab_datetime_now, ab_datetime_parse
 
 
 class NewtoLegacyFieldTransformation(RecordTransformation):
@@ -64,14 +68,21 @@ def transform(
 class MigrateEmptyStringState(StateMigration):
     cursor_field: str
     config: Config
+    cursor_format: Optional[str] = None
 
-    def __init__(self, cursor_field, config: Config):
+    def __init__(self, cursor_field, config: Config, cursor_format: Optional[str] = None):
         self.cursor_field = cursor_field
+        self.cursor_format = cursor_format
         self.config = config
 
     def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
         # if start date wasn't provided in the config default date will be used
         start_date = self.config.get("start_date", "2006-06-01T00:00:00.000Z")
+        if self.cursor_format:
+            dt = ab_datetime_parse(start_date)
+            formatted_start_date = DatetimeParser().format(dt, self.cursor_format)
+            return {self.cursor_field: formatted_start_date}
+
         return {self.cursor_field: start_date}
 
     def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
@@ -167,3 +178,87 @@ def transform(
 
         for data in additional_data:
             record.update(data)
+
+
+class EngagementsHttpRequester(HttpRequester):
+    """
+    Engagements stream uses different endpoints:
+    - Engagements Recent if start_date/state is less than 30 days and API is able to return all records (<10k), or
+    - Engagements All which extracts all records, but supports filter on connector side
+
+    Recent Engagements API:
+    https://legacydocs.hubspot.com/docs/methods/engagements/get-recent-engagements
+
+    Important: This endpoint returns only last 10k most recently updated records in the last 30 days.
+
+    All Engagements API:
+    https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements
+
+    Important:
+
+    1. The stream is declared to use one stream slice from start date(default/config/state) to time.now(). It doesn't have step.
+    Based on this we can use stream_slice["start_time"] and be sure that this is equal to value in initial state.
+    Stream Slice [start_time] is used to define _use_recent_api, concurrent processing of date windows is incompatible and therefore does not support using a step
+    2.The stream is declared to use 250 as page size param in pagination.
+    Recent Engagements API have 100 as max param but doesn't fail is bigger value was provided and returns to 100 as default.
+    3. The stream has is_client_side_incremental=true to filter Engagements All response.
+    """
+
+    recent_api_total_records_limit = 10000
+    recent_api_last_days_limit = 29
+
+    recent_api_path = "/engagements/v1/engagements/recent/modified"
+    all_api_path = "/engagements/v1/engagements/paged"
+
+    _use_recent_api = None
+
+    def should_use_recent_api(self, stream_slice: StreamSlice) -> bool:
+        if self._use_recent_api is not None:
+            return self._use_recent_api
+
+        # Recent engagements API returns records updated in the last 30 days only. If start time is older All engagements API should be used
+        if int(stream_slice["start_time"]) >= int(
+            DatetimeParser().format((ab_datetime_now() - timedelta(days=self.recent_api_last_days_limit)), "%ms")
+        ):
+            # Recent engagements API returns only 10k most recently updated records.
+            # API response indicates that there are more records so All engagements API should be used
+            _, response = self._http_client.send_request(
+                http_method=self.get_method().value,
+                url=self._join_url(self.get_url_base(), self.recent_api_path),
+                headers=self._request_headers({}, stream_slice, {}, {}),
+                params={"count": 250, "since": stream_slice["start_time"]},
+                request_kwargs={"stream": self.stream_response},
+            )
+            if response.json().get("total") <= self.recent_api_total_records_limit:
+                self._use_recent_api = True
+        else:
+            self._use_recent_api = False
+
+        return self._use_recent_api
+
+    def get_path(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> str:
+        if self.should_use_recent_api(stream_slice):
+            return self.recent_api_path
+        return self.all_api_path
+
+    def get_request_params(
+        self,
+        *,
+        stream_state: Optional[StreamState] = None,
+        stream_slice: Optional[StreamSlice] = None,
+        next_page_token: Optional[Mapping[str, Any]] = None,
+    ) -> MutableMapping[str, Any]:
+        request_params = self._request_options_provider.get_request_params(
+            stream_state=stream_state,
+            stream_slice=stream_slice,
+            next_page_token=next_page_token,
+        )
+        if self.should_use_recent_api(stream_slice):
+            request_params.update({"since": stream_slice["start_time"]})
+        return request_params