|
3 | 3 | #
|
4 | 4 |
|
5 | 5 | from dataclasses import InitVar, dataclass, field
|
6 |
| -from typing import Any, Dict, Iterable, List, Mapping, Optional, Union |
| 6 | +from datetime import timedelta |
| 7 | +from typing import Any, Dict, Iterable, List, Mapping, MutableMapping, Optional, Union |
7 | 8 |
|
8 | 9 | import dpath
|
9 | 10 | import requests
|
10 | 11 |
|
| 12 | +from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser |
11 | 13 | from airbyte_cdk.sources.declarative.decoders import Decoder, JsonDecoder
|
12 | 14 | from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
|
13 | 15 | from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
|
14 | 16 | from airbyte_cdk.sources.declarative.interpolation import InterpolatedString
|
15 | 17 | from airbyte_cdk.sources.declarative.migrations.state_migration import StateMigration
|
| 18 | +from airbyte_cdk.sources.declarative.requesters import HttpRequester |
16 | 19 | from airbyte_cdk.sources.declarative.requesters.requester import Requester
|
17 | 20 | from airbyte_cdk.sources.declarative.transformations import RecordTransformation
|
18 | 21 | from airbyte_cdk.sources.types import Config, StreamSlice, StreamState
|
| 22 | +from airbyte_cdk.utils.datetime_helpers import ab_datetime_now, ab_datetime_parse |
19 | 23 |
|
20 | 24 |
|
21 | 25 | class NewtoLegacyFieldTransformation(RecordTransformation):
|
@@ -64,14 +68,21 @@ def transform(
|
64 | 68 | class MigrateEmptyStringState(StateMigration):
|
65 | 69 | cursor_field: str
|
66 | 70 | config: Config
|
| 71 | + cursor_format: Optional[str] = None |
67 | 72 |
|
68 |
| - def __init__(self, cursor_field, config: Config): |
| 73 | + def __init__(self, cursor_field, config: Config, cursor_format: Optional[str] = None): |
69 | 74 | self.cursor_field = cursor_field
|
| 75 | + self.cursor_format = cursor_format |
70 | 76 | self.config = config
|
71 | 77 |
|
72 | 78 | def migrate(self, stream_state: Mapping[str, Any]) -> Mapping[str, Any]:
|
73 | 79 | # if start date wasn't provided in the config default date will be used
|
74 | 80 | start_date = self.config.get("start_date", "2006-06-01T00:00:00.000Z")
|
| 81 | + if self.cursor_format: |
| 82 | + dt = ab_datetime_parse(start_date) |
| 83 | + formatted_start_date = DatetimeParser().format(dt, self.cursor_format) |
| 84 | + return {self.cursor_field: formatted_start_date} |
| 85 | + |
75 | 86 | return {self.cursor_field: start_date}
|
76 | 87 |
|
77 | 88 | def should_migrate(self, stream_state: Mapping[str, Any]) -> bool:
|
@@ -167,3 +178,87 @@ def transform(
|
167 | 178 |
|
168 | 179 | for data in additional_data:
|
169 | 180 | record.update(data)
|
| 181 | + |
| 182 | + |
| 183 | +class EngagementsHttpRequester(HttpRequester): |
| 184 | + """ |
| 185 | + Engagements stream uses different endpoints: |
| 186 | + - Engagements Recent if start_date/state is less than 30 days and API is able to return all records (<10k), or |
| 187 | + - Engagements All which extracts all records, but supports filter on connector side |
| 188 | +
|
| 189 | + Recent Engagements API: |
| 190 | + https://legacydocs.hubspot.com/docs/methods/engagements/get-recent-engagements |
| 191 | +
|
| 192 | + Important: This endpoint returns only last 10k most recently updated records in the last 30 days. |
| 193 | +
|
| 194 | + All Engagements API: |
| 195 | + https://legacydocs.hubspot.com/docs/methods/engagements/get-all-engagements |
| 196 | +
|
| 197 | + Important: |
| 198 | +
|
| 199 | + 1. The stream is declared to use one stream slice from start date(default/config/state) to time.now(). It doesn't have step. |
| 200 | + Based on this we can use stream_slice["start_time"] and be sure that this is equal to value in initial state. |
| 201 | + Stream Slice [start_time] is used to define _use_recent_api, concurrent processing of date windows is incompatible and therefore does not support using a step |
| 202 | + 2.The stream is declared to use 250 as page size param in pagination. |
| 203 | + Recent Engagements API have 100 as max param but doesn't fail is bigger value was provided and returns to 100 as default. |
| 204 | + 3. The stream has is_client_side_incremental=true to filter Engagements All response. |
| 205 | + """ |
| 206 | + |
| 207 | + recent_api_total_records_limit = 10000 |
| 208 | + recent_api_last_days_limit = 29 |
| 209 | + |
| 210 | + recent_api_path = "/engagements/v1/engagements/recent/modified" |
| 211 | + all_api_path = "/engagements/v1/engagements/paged" |
| 212 | + |
| 213 | + _use_recent_api = None |
| 214 | + |
| 215 | + def should_use_recent_api(self, stream_slice: StreamSlice) -> bool: |
| 216 | + if self._use_recent_api is not None: |
| 217 | + return self._use_recent_api |
| 218 | + |
| 219 | + # Recent engagements API returns records updated in the last 30 days only. If start time is older All engagements API should be used |
| 220 | + if int(stream_slice["start_time"]) >= int( |
| 221 | + DatetimeParser().format((ab_datetime_now() - timedelta(days=self.recent_api_last_days_limit)), "%ms") |
| 222 | + ): |
| 223 | + # Recent engagements API returns only 10k most recently updated records. |
| 224 | + # API response indicates that there are more records so All engagements API should be used |
| 225 | + _, response = self._http_client.send_request( |
| 226 | + http_method=self.get_method().value, |
| 227 | + url=self._join_url(self.get_url_base(), self.recent_api_path), |
| 228 | + headers=self._request_headers({}, stream_slice, {}, {}), |
| 229 | + params={"count": 250, "since": stream_slice["start_time"]}, |
| 230 | + request_kwargs={"stream": self.stream_response}, |
| 231 | + ) |
| 232 | + if response.json().get("total") <= self.recent_api_total_records_limit: |
| 233 | + self._use_recent_api = True |
| 234 | + else: |
| 235 | + self._use_recent_api = False |
| 236 | + |
| 237 | + return self._use_recent_api |
| 238 | + |
| 239 | + def get_path( |
| 240 | + self, |
| 241 | + *, |
| 242 | + stream_state: Optional[StreamState] = None, |
| 243 | + stream_slice: Optional[StreamSlice] = None, |
| 244 | + next_page_token: Optional[Mapping[str, Any]] = None, |
| 245 | + ) -> str: |
| 246 | + if self.should_use_recent_api(stream_slice): |
| 247 | + return self.recent_api_path |
| 248 | + return self.all_api_path |
| 249 | + |
| 250 | + def get_request_params( |
| 251 | + self, |
| 252 | + *, |
| 253 | + stream_state: Optional[StreamState] = None, |
| 254 | + stream_slice: Optional[StreamSlice] = None, |
| 255 | + next_page_token: Optional[Mapping[str, Any]] = None, |
| 256 | + ) -> MutableMapping[str, Any]: |
| 257 | + request_params = self._request_options_provider.get_request_params( |
| 258 | + stream_state=stream_state, |
| 259 | + stream_slice=stream_slice, |
| 260 | + next_page_token=next_page_token, |
| 261 | + ) |
| 262 | + if self.should_use_recent_api(stream_slice): |
| 263 | + request_params.update({"since": stream_slice["start_time"]}) |
| 264 | + return request_params |
0 commit comments