Skip to content

Commit 9507d56

Browse files
authored
low-code connectors: fix parse and format methods (#15326)
* fix parse and format methods * define constant * remove timestamp magic keyword * comment * test for ci * uncomment test * use timestamp() * Bump cdk version * bump to 0.1.72
1 parent fd0b769 commit 9507d56

File tree

5 files changed

+80
-22
lines changed

5 files changed

+80
-22
lines changed

airbyte-cdk/python/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 0.1.72
4+
- Bugfix: Fix bug in DatetimeStreamSlicer's parsing method
5+
36
## 0.1.71
47
- Refactor declarative package to dataclasses
58
- Bugfix: Requester header always converted to string

airbyte-cdk/python/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@ class MinMaxDatetime(JsonSchemaMixin):
1717
min_date, then min_date is returned. If date is greater than max_date, then max_date is returned.
1818
If neither, the input date is returned.
1919
20+
The timestamp format accepts the same format codes as datetime.strfptime, which are
21+
all the format codes required by the 1989 C standard.
22+
Full list of accepted format codes: https://man7.org/linux/man-pages/man3/strftime.3.html
23+
2024
Attributes:
2125
datetime (Union[InterpolatedString, str]): InterpolatedString or string representing the datetime in the format specified by `datetime_format`
2226
datetime_format (str): Format of the datetime passed as argument

airbyte-cdk/python/airbyte_cdk/sources/declarative/stream_slicers/datetime_stream_slicer.py

Lines changed: 16 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
import datetime
66
import re
77
from dataclasses import InitVar, dataclass, field
8-
from typing import Any, Iterable, Mapping, Optional
8+
from typing import Any, Iterable, Mapping, Optional, Union
99

10-
import dateutil
1110
from airbyte_cdk.models import SyncMode
1211
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
1312
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -35,6 +34,10 @@ class DatetimeStreamSlicer(StreamSlicer, JsonSchemaMixin):
3534
3635
For example, "1d" will produce windows of 1 day, and 2weeks windows of 2 weeks.
3736
37+
The timestamp format accepts the same format codes as datetime.strfptime, which are
38+
all the format codes required by the 1989 C standard.
39+
Full list of accepted format codes: https://man7.org/linux/man-pages/man3/strftime.3.html
40+
3841
Attributes:
3942
start_datetime (MinMaxDatetime): the datetime that determines the earliest record that should be synced
4043
end_datetime (MinMaxDatetime): the datetime that determines the last record that should be synced
@@ -128,7 +131,7 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) ->
128131
"""
129132
stream_state = stream_state or {}
130133
kwargs = {"stream_state": stream_state}
131-
end_datetime = min(self.end_datetime.get_datetime(self.config, **kwargs), datetime.datetime.now(tz=datetime.timezone.utc))
134+
end_datetime = min(self.end_datetime.get_datetime(self.config, **kwargs), datetime.datetime.now(tz=self._timezone))
132135
lookback_delta = self._parse_timedelta(self.lookback_window.eval(self.config, **kwargs) if self.lookback_window else "0d")
133136
start_datetime = self.start_datetime.get_datetime(self.config, **kwargs) - lookback_delta
134137
start_datetime = min(start_datetime, end_datetime)
@@ -148,8 +151,11 @@ def stream_slices(self, sync_mode: SyncMode, stream_state: Mapping[str, Any]) ->
148151
return dates
149152

150153
def _format_datetime(self, dt: datetime.datetime):
151-
if self.datetime_format == "timestamp":
152-
return dt.timestamp()
154+
# strftime("%s") is unreliable because it ignores the time zone information and assumes the time zone of the system it's running on
155+
# It's safer to use the timestamp() method than the %s directive
156+
# See https://stackoverflow.com/a/4974930
157+
if self.datetime_format == "%s":
158+
return str(int(dt.timestamp()))
153159
else:
154160
return dt.strftime(self.datetime_format)
155161

@@ -167,22 +173,11 @@ def _get_date(self, cursor_value, default_date: datetime.datetime, comparator) -
167173
cursor_date = self.parse_date(cursor_value or default_date)
168174
return comparator(cursor_date, default_date)
169175

170-
def parse_date(self, date: Any) -> datetime:
171-
if date and isinstance(date, str):
172-
if self.is_int(date):
173-
return datetime.datetime.fromtimestamp(int(date)).replace(tzinfo=self._timezone)
174-
else:
175-
return dateutil.parser.parse(date).replace(tzinfo=self._timezone)
176-
elif isinstance(date, int):
177-
return datetime.datetime.fromtimestamp(int(date)).replace(tzinfo=self._timezone)
178-
return date
179-
180-
def is_int(self, s) -> bool:
181-
try:
182-
int(s)
183-
return True
184-
except ValueError:
185-
return False
176+
def parse_date(self, date: Union[str, datetime.datetime]) -> datetime.datetime:
177+
if isinstance(date, str):
178+
return datetime.datetime.strptime(str(date), self.datetime_format).replace(tzinfo=self._timezone)
179+
else:
180+
return date
186181

187182
@classmethod
188183
def _parse_timedelta(cls, time_str):

airbyte-cdk/python/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setup(
1717
name="airbyte-cdk",
18-
version="0.1.71",
18+
version="0.1.72",
1919
description="A framework for writing Airbyte Connectors.",
2020
long_description=README,
2121
long_description_content_type="text/markdown",

airbyte-cdk/python/unit_tests/sources/declarative/stream_slicers/test_datetime_stream_slicer.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -445,5 +445,61 @@ def test_request_option(test_name, inject_into, field_name, expected_req_params,
445445
assert expected_body_data == slicer.get_request_body_data(stream_slice=stream_slice)
446446

447447

448+
@pytest.mark.parametrize(
449+
"test_name, input_date, date_format, expected_output_date",
450+
[
451+
(
452+
"test_parse_date_iso",
453+
"2021-01-01T00:00:00.000000+0000",
454+
"%Y-%m-%dT%H:%M:%S.%f%z",
455+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
456+
),
457+
("test_parse_date_number", "20210101", "%Y%m%d", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)),
458+
(
459+
"test_parse_date_datetime",
460+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
461+
"%Y%m%d",
462+
datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc),
463+
),
464+
],
465+
)
466+
def test_parse_date(test_name, input_date, date_format, expected_output_date):
467+
slicer = DatetimeStreamSlicer(
468+
start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000", options={}),
469+
end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000", options={}),
470+
step="1d",
471+
cursor_field=InterpolatedString(cursor_field, options={}),
472+
datetime_format=date_format,
473+
lookback_window=InterpolatedString("0d", options={}),
474+
config=config,
475+
options={},
476+
)
477+
output_date = slicer.parse_date(input_date)
478+
assert expected_output_date == output_date
479+
480+
481+
@pytest.mark.parametrize(
482+
"test_name, input_dt, datetimeformat, expected_output",
483+
[
484+
("test_format_timestamp", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%s", "1609459200"),
485+
("test_format_string", datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), "%Y-%m-%d", "2021-01-01"),
486+
],
487+
)
488+
def test_format_datetime(test_name, input_dt, datetimeformat, expected_output):
489+
slicer = DatetimeStreamSlicer(
490+
start_datetime=MinMaxDatetime("2021-01-01T00:00:00.000000+0000", options={}),
491+
end_datetime=MinMaxDatetime("2021-01-10T00:00:00.000000+0000", options={}),
492+
step="1d",
493+
cursor_field=InterpolatedString(cursor_field, options={}),
494+
datetime_format=datetimeformat,
495+
lookback_window=InterpolatedString("0d", options={}),
496+
config=config,
497+
options={},
498+
)
499+
500+
output_date = slicer._format_datetime(input_dt)
501+
assert expected_output == output_date
502+
503+
448504
if __name__ == "__main__":
449505
unittest.main()

0 commit comments

Comments
 (0)