Skip to content

Commit 6332fd6

Browse files
girardasherifnada
andauthored
[low-code-connectors] Replace JelloExtractor with DpathExtractor (#15514)
* Handle extracting no records from root * handle missing keys * record extractor interface * dpath extractor * docstring * handle extract root array * Update airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py Co-authored-by: Sherif A. Nada <[email protected]> * Update airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py Co-authored-by: Sherif A. Nada <[email protected]> * update docstring * respect extractor interface * edge case handling * document * use dpath by default * delete jello extractor * bump cdk version * delete jello dependency * Update reference docs templates * update template Co-authored-by: Sherif A. Nada <[email protected]>
1 parent 8c30067 commit 6332fd6

21 files changed

+240
-156
lines changed

airbyte-cdk/python/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 0.1.74
4+
- Replace JelloRecordExtractor with DpathRecordExtractor
5+
36
## 0.1.73
47
- Bugfix: Fix bug in DatetimeStreamSlicer's parsing method
58

airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@
22
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
33
#
44

5+
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
56
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
6-
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
77
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
88
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
99

10-
__all__ = ["HttpSelector", "JelloExtractor", "RecordFilter", "RecordSelector"]
10+
__all__ = ["HttpSelector", "DpathExtractor", "RecordFilter", "RecordSelector"]
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
#
2+
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from dataclasses import InitVar, dataclass
6+
from typing import Any, List, Mapping, Union
7+
8+
import dpath.util
9+
import requests
10+
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
11+
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
12+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
13+
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
14+
from airbyte_cdk.sources.declarative.types import Config, Record
15+
from dataclasses_jsonschema import JsonSchemaMixin
16+
17+
18+
@dataclass
19+
class DpathExtractor(RecordExtractor, JsonSchemaMixin):
20+
"""
21+
Record extractor that searches a decoded response over a path defined as an array of fields.
22+
23+
If the field pointer points to an array, that array is returned.
24+
If the field pointer points to an object, that object is returned wrapped as an array.
25+
If the field pointer points to an empty object, an empty array is returned.
26+
If the field pointer points to a non-existing path, an empty array is returned.
27+
28+
Examples of instantiating this transform:
29+
```
30+
extractor:
31+
type: DpathExtractor
32+
field_pointer:
33+
- "root"
34+
- "data"
35+
```
36+
37+
```
38+
extractor:
39+
type: DpathExtractor
40+
field_pointer:
41+
- "root"
42+
- "{{ options['field'] }}"
43+
```
44+
45+
```
46+
extractor:
47+
type: DpathExtractor
48+
field_pointer: []
49+
```
50+
51+
Attributes:
52+
transform (Union[InterpolatedString, str]): Pointer to the field that should be extracted
53+
config (Config): The user-provided configuration as specified by the source's spec
54+
decoder (Decoder): The decoder responsible to transfom the response in a Mapping
55+
"""
56+
57+
field_pointer: List[Union[InterpolatedString, str]]
58+
config: Config
59+
options: InitVar[Mapping[str, Any]]
60+
decoder: Decoder = JsonDecoder(options={})
61+
62+
def __post_init__(self, options: Mapping[str, Any]):
63+
for pointer_index in range(len(self.field_pointer)):
64+
if isinstance(self.field_pointer[pointer_index], str):
65+
self.field_pointer[pointer_index] = InterpolatedString.create(self.field_pointer[pointer_index], options=options)
66+
67+
def extract_records(self, response: requests.Response) -> List[Record]:
68+
response_body = self.decoder.decode(response)
69+
if len(self.field_pointer) == 0:
70+
extracted = response_body
71+
else:
72+
pointer = [pointer.eval(self.config) for pointer in self.field_pointer]
73+
extracted = dpath.util.get(response_body, pointer, default=[])
74+
if isinstance(extracted, list):
75+
return extracted
76+
elif extracted:
77+
return [extracted]
78+
else:
79+
return []

airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/jello.py

Lines changed: 0 additions & 43 deletions
This file was deleted.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#
2+
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3+
#
4+
5+
from abc import ABC, abstractmethod
6+
from dataclasses import dataclass
7+
from typing import List
8+
9+
import requests
10+
from airbyte_cdk.sources.declarative.types import Record
11+
12+
13+
@dataclass
14+
class RecordExtractor(ABC):
15+
"""
16+
Responsible for translating an HTTP response into a list of records by extracting records from the response.
17+
"""
18+
19+
@abstractmethod
20+
def extract_records(
21+
self,
22+
response: requests.Response,
23+
) -> List[Record]:
24+
"""
25+
Selects records from the response
26+
:param response: The response to extract the records from
27+
:return: List of Records extracted from the response
28+
"""
29+
pass

airbyte-cdk/python/airbyte_cdk/sources/declarative/extractors/record_selector.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
import requests
99
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
10-
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
10+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
1111
from airbyte_cdk.sources.declarative.extractors.record_filter import RecordFilter
1212
from airbyte_cdk.sources.declarative.types import Record, StreamSlice, StreamState
1313
from dataclasses_jsonschema import JsonSchemaMixin
@@ -20,11 +20,11 @@ class RecordSelector(HttpSelector, JsonSchemaMixin):
2020
records based on a heuristic.
2121
2222
Attributes:
23-
extractor (JelloExtractor): The record extractor responsible for extracting records from a response
23+
extractor (RecordExtractor): The record extractor responsible for extracting records from a response
2424
record_filter (RecordFilter): The record filter responsible for filtering extracted records
2525
"""
2626

27-
extractor: JelloExtractor
27+
extractor: RecordExtractor
2828
options: InitVar[Mapping[str, Any]]
2929
record_filter: RecordFilter = None
3030

@@ -39,9 +39,6 @@ def select_records(
3939
next_page_token: Optional[Mapping[str, Any]] = None,
4040
) -> List[Record]:
4141
all_records = self.extractor.extract_records(response)
42-
# Some APIs don't wrap single records in a list
43-
if not isinstance(all_records, list):
44-
all_records = [all_records]
4542
if self.record_filter:
4643
return self.record_filter.filter_records(
4744
all_records, stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token

airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/class_types_registry.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from airbyte_cdk.sources.declarative.auth.token import ApiKeyAuthenticator, BasicHttpAuthenticator, BearerAuthenticator
88
from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime
99
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
10-
from airbyte_cdk.sources.declarative.extractors.jello import JelloExtractor
10+
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
1111
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
1212
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
1313
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -46,11 +46,11 @@
4646
"DatetimeStreamSlicer": DatetimeStreamSlicer,
4747
"DeclarativeStream": DeclarativeStream,
4848
"DefaultErrorHandler": DefaultErrorHandler,
49+
"DpathExtractor": DpathExtractor,
4950
"ExponentialBackoffStrategy": ExponentialBackoffStrategy,
5051
"HttpRequester": HttpRequester,
5152
"InterpolatedBoolean": InterpolatedBoolean,
5253
"InterpolatedString": InterpolatedString,
53-
"JelloExtractor": JelloExtractor,
5454
"JsonSchema": JsonSchema,
5555
"LimitPaginator": LimitPaginator,
5656
"ListStreamSlicer": ListStreamSlicer,

airbyte-cdk/python/airbyte_cdk/sources/declarative/parsers/default_implementation_registry.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,9 @@
1010
from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
1111
from airbyte_cdk.sources.declarative.decoders.decoder import Decoder
1212
from airbyte_cdk.sources.declarative.decoders.json_decoder import JsonDecoder
13+
from airbyte_cdk.sources.declarative.extractors.dpath_extractor import DpathExtractor
1314
from airbyte_cdk.sources.declarative.extractors.http_selector import HttpSelector
15+
from airbyte_cdk.sources.declarative.extractors.record_extractor import RecordExtractor
1416
from airbyte_cdk.sources.declarative.extractors.record_selector import RecordSelector
1517
from airbyte_cdk.sources.declarative.interpolation.interpolated_boolean import InterpolatedBoolean
1618
from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString
@@ -50,11 +52,12 @@
5052
InterpolatedString: InterpolatedString,
5153
MinMaxDatetime: MinMaxDatetime,
5254
Paginator: NoPagination,
55+
ParentStreamConfig: ParentStreamConfig,
56+
RecordExtractor: DpathExtractor,
5357
RequestOption: RequestOption,
5458
RequestOptionsProvider: InterpolatedRequestOptionsProvider,
5559
Requester: HttpRequester,
5660
Retriever: SimpleRetriever,
57-
ParentStreamConfig: ParentStreamConfig,
5861
SchemaLoader: JsonSchema,
5962
Stream: DeclarativeStream,
6063
StreamSlicer: SingleSlice,

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.datetime.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
Submodules
33
----------
44

5+
airbyte\_cdk.sources.declarative.datetime.datetime\_parser module
6+
-----------------------------------------------------------------
7+
8+
.. automodule:: airbyte_cdk.sources.declarative.datetime.datetime_parser
9+
:members:
10+
:undoc-members:
11+
:show-inheritance:
12+
513
airbyte\_cdk.sources.declarative.datetime.min\_max\_datetime module
614
-------------------------------------------------------------------
715

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.extractors.rst

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
Submodules
33
----------
44

5+
airbyte\_cdk.sources.declarative.extractors.dpath\_extractor module
6+
-------------------------------------------------------------------
7+
8+
.. automodule:: airbyte_cdk.sources.declarative.extractors.dpath_extractor
9+
:members:
10+
:undoc-members:
11+
:show-inheritance:
12+
513
airbyte\_cdk.sources.declarative.extractors.http\_selector module
614
-----------------------------------------------------------------
715

@@ -10,10 +18,10 @@ airbyte\_cdk.sources.declarative.extractors.http\_selector module
1018
:undoc-members:
1119
:show-inheritance:
1220

13-
airbyte\_cdk.sources.declarative.extractors.jello module
14-
--------------------------------------------------------
21+
airbyte\_cdk.sources.declarative.extractors.record\_extractor module
22+
--------------------------------------------------------------------
1523

16-
.. automodule:: airbyte_cdk.sources.declarative.extractors.jello
24+
.. automodule:: airbyte_cdk.sources.declarative.extractors.record_extractor
1725
:members:
1826
:undoc-members:
1927
:show-inheritance:

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.requesters.paginators.rst

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,6 @@ airbyte\_cdk.sources.declarative.requesters.paginators.no\_pagination module
2626
:undoc-members:
2727
:show-inheritance:
2828

29-
airbyte\_cdk.sources.declarative.requesters.paginators.pagination\_strategy module
30-
----------------------------------------------------------------------------------
31-
32-
.. automodule:: airbyte_cdk.sources.declarative.requesters.paginators.pagination_strategy
33-
:members:
34-
:undoc-members:
35-
:show-inheritance:
36-
3729
airbyte\_cdk.sources.declarative.requesters.paginators.paginator module
3830
-----------------------------------------------------------------------
3931

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.requesters.paginators.strategies.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,14 @@ airbyte\_cdk.sources.declarative.requesters.paginators.strategies.page\_incremen
2626
:undoc-members:
2727
:show-inheritance:
2828

29+
airbyte\_cdk.sources.declarative.requesters.paginators.strategies.pagination\_strategy module
30+
---------------------------------------------------------------------------------------------
31+
32+
.. automodule:: airbyte_cdk.sources.declarative.requesters.paginators.strategies.pagination_strategy
33+
:members:
34+
:undoc-members:
35+
:show-inheritance:
36+
2937
Module contents
3038
---------------
3139

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.requesters.request_options.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,14 @@
22
Submodules
33
----------
44

5+
airbyte\_cdk.sources.declarative.requesters.request\_options.interpolated\_request\_input\_provider module
6+
----------------------------------------------------------------------------------------------------------
7+
8+
.. automodule:: airbyte_cdk.sources.declarative.requesters.request_options.interpolated_request_input_provider
9+
:members:
10+
:undoc-members:
11+
:show-inheritance:
12+
513
airbyte\_cdk.sources.declarative.requesters.request\_options.interpolated\_request\_options\_provider module
614
------------------------------------------------------------------------------------------------------------
715

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.requesters.rst

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,6 @@ airbyte\_cdk.sources.declarative.requesters.http\_requester module
2020
:undoc-members:
2121
:show-inheritance:
2222

23-
airbyte\_cdk.sources.declarative.requesters.interpolated\_request\_input\_provider module
24-
-----------------------------------------------------------------------------------------
25-
26-
.. automodule:: airbyte_cdk.sources.declarative.requesters.interpolated_request_input_provider
27-
:members:
28-
:undoc-members:
29-
:show-inheritance:
30-
3123
airbyte\_cdk.sources.declarative.requesters.request\_option module
3224
------------------------------------------------------------------
3325

airbyte-cdk/python/reference_docs/_source/api/airbyte_cdk.sources.declarative.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,10 @@ airbyte\_cdk.sources.declarative.declarative\_stream module
4545
:undoc-members:
4646
:show-inheritance:
4747

48-
airbyte\_cdk.sources.declarative.read\_exception module
49-
-------------------------------------------------------
48+
airbyte\_cdk.sources.declarative.exceptions module
49+
--------------------------------------------------
5050

51-
.. automodule:: airbyte_cdk.sources.declarative.read_exception
51+
.. automodule:: airbyte_cdk.sources.declarative.exceptions
5252
:members:
5353
:undoc-members:
5454
:show-inheritance:

airbyte-cdk/python/setup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515

1616
setup(
1717
name="airbyte-cdk",
18-
version="0.1.73",
18+
version="0.1.74",
1919
description="A framework for writing Airbyte Connectors.",
2020
long_description=README,
2121
long_description_content_type="text/markdown",
@@ -55,7 +55,6 @@
5555
"vcrpy",
5656
"Deprecated~=1.2",
5757
"Jinja2~=3.1.2",
58-
"jello~=1.5.2",
5958
],
6059
python_requires=">=3.9",
6160
extras_require={

0 commit comments

Comments
 (0)