Skip to content

Commit 8ec438a

Browse files
author
Anton Karpets
authored
File-based CDK: allow to merge schemas with nullable object values (#37773)
1 parent abd5bf1 commit 8ec438a

File tree

2 files changed

+17
-8
lines changed

2 files changed

+17
-8
lines changed

airbyte-cdk/python/airbyte_cdk/sources/file_based/schema_helpers.py

+9-7
Original file line numberDiff line numberDiff line change
@@ -104,9 +104,7 @@ def merge_schemas(schema1: SchemaType, schema2: SchemaType) -> SchemaType:
104104
t1 = merged_schema.get(k2)
105105
if t1 is None:
106106
merged_schema[k2] = t2
107-
# do not pass fields with null values to the _choose_wider_type method if their keys are already
108-
# in the merged schema due to issue observed in https://github.com/airbytehq/oncall/issues/4948
109-
elif t1 == t2 or t2["type"] == "null":
107+
elif t1 == t2:
110108
continue
111109
else:
112110
merged_schema[k2] = _choose_wider_type(k2, t1, t2)
@@ -119,23 +117,27 @@ def _is_valid_type(t: JsonSchemaSupportedType) -> bool:
119117

120118

121119
def _choose_wider_type(key: str, t1: Mapping[str, Any], t2: Mapping[str, Any]) -> Mapping[str, Any]:
122-
if (t1["type"] == "array" or t2["type"] == "array") and t1 != t2:
120+
t1_type = t1["type"]
121+
t2_type = t2["type"]
122+
123+
if (t1_type == "array" or t2_type == "array") and t1 != t2:
123124
raise SchemaInferenceError(
124125
FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
125126
details="Cannot merge schema for unequal array types.",
126127
key=key,
127128
detected_types=f"{t1},{t2}",
128129
)
129-
elif (t1["type"] == "object" or t2["type"] == "object") and t1 != t2:
130+
# Schemas can still be merged if a key contains a null value in either t1 or t2, but it is still an object
131+
elif (t1_type == "object" or t2_type == "object") and t1_type != "null" and t2_type != "null" and t1 != t2:
130132
raise SchemaInferenceError(
131133
FileBasedSourceError.SCHEMA_INFERENCE_ERROR,
132134
details="Cannot merge schema for unequal object types.",
133135
key=key,
134136
detected_types=f"{t1},{t2}",
135137
)
136138
else:
137-
comparable_t1 = get_comparable_type(TYPE_PYTHON_MAPPING[t1["type"]][0]) # accessing the type_mapping value
138-
comparable_t2 = get_comparable_type(TYPE_PYTHON_MAPPING[t2["type"]][0]) # accessing the type_mapping value
139+
comparable_t1 = get_comparable_type(TYPE_PYTHON_MAPPING[t1_type][0]) # accessing the type_mapping value
140+
comparable_t2 = get_comparable_type(TYPE_PYTHON_MAPPING[t2_type][0]) # accessing the type_mapping value
139141
if not comparable_t1 and comparable_t2:
140142
raise SchemaInferenceError(FileBasedSourceError.UNRECOGNIZED_TYPE, key=key, detected_types=f"{t1},{t2}")
141143
return max(

airbyte-cdk/python/unit_tests/sources/file_based/test_schema_helpers.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -264,13 +264,20 @@ def test_comparable_types() -> None:
264264
{"a": {"type": "object"}},
265265
{"a": {"type": "null"}},
266266
{"a": {"type": "object"}},
267-
id="single-key-with-null-object-schema2"),
267+
id="single-key-with-null-object-schema2",
268+
),
268269
pytest.param(
269270
{"a": {"type": "object"}},
270271
{"b": {"type": "null"}},
271272
{"a": {"type": "object"}, "b": {"type": "null"}},
272273
id="new-key-with-null-type",
273274
),
275+
pytest.param(
276+
{"a": {"type": "null"}},
277+
{"a": {"type": "object"}},
278+
{"a": {"type": "object"}},
279+
id="single-key-with-null-object-schema1",
280+
),
274281
],
275282
)
276283
def test_merge_schemas(schema1: SchemaType, schema2: SchemaType, expected_result: Optional[SchemaType]) -> None:

0 commit comments

Comments
 (0)