Skip to content

Commit 9e35a88

Browse files
szubsternatikgadzhigirarda
authored
Improve performance of interpolation in decalarative sources (#44027)
Co-authored-by: Natik Gadzhi <[email protected]> Co-authored-by: Alexandre Girard <[email protected]>
1 parent 3b469ea commit 9e35a88

File tree

3 files changed

+47
-5
lines changed

3 files changed

+47
-5
lines changed

airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/interpolated_string.py

+11
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ def __post_init__(self, parameters: Mapping[str, Any]) -> None:
2828
self.default = self.default or self.string
2929
self._interpolation = JinjaInterpolation()
3030
self._parameters = parameters
31+
# indicates whether passed string is just a plain string, not Jinja template
32+
# This allows for optimization, but we do not know it yet at this stage
33+
self._is_plain_string = None
3134

3235
def eval(self, config: Config, **kwargs: Any) -> Any:
3336
"""
@@ -37,6 +40,14 @@ def eval(self, config: Config, **kwargs: Any) -> Any:
3740
:param kwargs: Optional parameters used for interpolation
3841
:return: The interpolated string
3942
"""
43+
if self._is_plain_string:
44+
return self.string
45+
if self._is_plain_string is None:
46+
# Let's check whether output from evaluation is the same as input.
47+
# This indicates occurence of a plain string, not a template and we can skip Jinja in subsequent runs.
48+
evaluated = self._interpolation.eval(self.string, config, self.default, parameters=self._parameters, **kwargs)
49+
self._is_plain_string = self.string == evaluated
50+
return evaluated
4051
return self._interpolation.eval(self.string, config, self.default, parameters=self._parameters, **kwargs)
4152

4253
def __eq__(self, other: Any) -> bool:

airbyte-cdk/python/airbyte_cdk/sources/declarative/interpolation/jinja.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,15 @@
33
#
44

55
import ast
6+
from functools import cache
67
from typing import Any, Mapping, Optional, Tuple, Type
78

89
from airbyte_cdk.sources.declarative.interpolation.filters import filters
910
from airbyte_cdk.sources.declarative.interpolation.interpolation import Interpolation
1011
from airbyte_cdk.sources.declarative.interpolation.macros import macros
1112
from airbyte_cdk.sources.types import Config
1213
from jinja2 import meta
14+
from jinja2.environment import Template
1315
from jinja2.exceptions import UndefinedError
1416
from jinja2.sandbox import SandboxedEnvironment
1517

@@ -114,13 +116,27 @@ def _literal_eval(self, result: Optional[str], valid_types: Optional[Tuple[Type[
114116

115117
def _eval(self, s: Optional[str], context: Mapping[str, Any]) -> Optional[str]:
116118
try:
117-
ast = self._environment.parse(s) # type: ignore # parse is able to handle None
118-
undeclared = meta.find_undeclared_variables(ast)
119+
undeclared = self._find_undeclared_variables(s)
119120
undeclared_not_in_context = {var for var in undeclared if var not in context}
120121
if undeclared_not_in_context:
121122
raise ValueError(f"Jinja macro has undeclared variables: {undeclared_not_in_context}. Context: {context}")
122-
return self._environment.from_string(s).render(context) # type: ignore # from_string is able to handle None
123+
return self._compile(s).render(context) # type: ignore # from_string is able to handle None
123124
except TypeError:
124125
# The string is a static value, not a jinja template
125126
# It can be returned as is
126127
return s
128+
129+
@cache
130+
def _find_undeclared_variables(self, s: Optional[str]) -> Template:
131+
"""
132+
Find undeclared variables and cache them
133+
"""
134+
ast = self._environment.parse(s) # type: ignore # parse is able to handle None
135+
return meta.find_undeclared_variables(ast)
136+
137+
@cache
138+
def _compile(self, s: Optional[str]) -> Template:
139+
"""
140+
We must cache the Jinja Template ourselves because we're using `from_string` instead of a template loader
141+
"""
142+
return self._environment.from_string(s)

airbyte-cdk/python/unit_tests/sources/declarative/interpolation/test_jinja.py

+17-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,13 @@ def test_get_value_from_config():
2020
assert val == "2022-01-01"
2121

2222

23+
def test_get_missing_value_from_config():
24+
s = "{{ config['date'] }}"
25+
config = {}
26+
val = interpolation.eval(s, config)
27+
assert val is None
28+
29+
2330
@pytest.mark.parametrize(
2431
"valid_types, expected_value",
2532
[
@@ -42,6 +49,14 @@ def test_get_value_from_stream_slice():
4249
assert val == "2020-09-09"
4350

4451

52+
def test_get_missing_value_from_stream_slice():
53+
s = "{{ stream_slice['date'] }}"
54+
config = {"date": "2022-01-01"}
55+
stream_slice = {}
56+
val = interpolation.eval(s, config, **{"stream_slice": stream_slice})
57+
assert val is None
58+
59+
4560
def test_get_value_from_a_list_of_mappings():
4661
s = "{{ records[0]['date'] }}"
4762
config = {"date": "2022-01-01"}
@@ -142,7 +157,7 @@ def test_negative_day_delta():
142157
("test_false_to_string", False, "false"),
143158
("test_array_to_string", ["hello", "world"], '["hello", "world"]'),
144159
("test_object_to_array", {"hello": "world"}, '{"hello": "world"}'),
145-
]
160+
],
146161
)
147162
def test_to_string(test_name, input_value, expected_output):
148163
interpolation = JinjaInterpolation()
@@ -254,7 +269,7 @@ def test_undeclared_variables(template_string, expected_error, expected_value):
254269
id="test_now_utc_with_duration_and_format",
255270
),
256271
pytest.param("{{ 1 | string }}", "1", id="test_int_to_string"),
257-
pytest.param("{{ [\"hello\", \"world\"] | string }}", "[\"hello\", \"world\"]", id="test_array_to_string"),
272+
pytest.param('{{ ["hello", "world"] | string }}', '["hello", "world"]', id="test_array_to_string"),
258273
],
259274
)
260275
def test_macros_examples(template_string, expected_value):

0 commit comments

Comments
 (0)