Skip to content

Commit dccb2fa

Browse files
authored
CAT: set additionalProperties recursively for objects (#34448)
1 parent b37efe9 commit dccb2fa

File tree

3 files changed

+95
-17
lines changed

3 files changed

+95
-17
lines changed

airbyte-integrations/bases/connector-acceptance-test/CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
# Changelog
22

3+
## 3.3.2
4+
Fix TestBasicRead.test_read.validate_schema: set `additionalProperties` to False recursively for objects
5+
36
## 3.3.1
47
Fix TestSpec.test_oauth_is_default_method to skip connectors that doesn't have predicate_key object.
58

airbyte-integrations/bases/connector-acceptance-test/connector_acceptance_test/utils/asserts.py

Lines changed: 36 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
from collections import defaultdict
99
from typing import Any, Dict, List, Mapping
1010

11-
import dpath.util
1211
import pendulum
1312
from airbyte_protocol.models import AirbyteRecordMessage, ConfiguredAirbyteCatalog
1413
from jsonschema import Draft7Validator, FormatChecker, FormatError, ValidationError, validators
@@ -26,6 +25,40 @@
2625
Draft7ValidatorWithStrictInteger = validators.extend(Draft7Validator, type_checker=strict_integer_type_checker)
2726

2827

28+
class NoAdditionalPropertiesValidator(Draft7Validator):
29+
def __init__(self, schema, **kwargs):
30+
schema = self._enforce_false_additional_properties(schema)
31+
super().__init__(schema, **kwargs)
32+
33+
@staticmethod
34+
def _enforce_false_additional_properties(json_schema: Dict[str, Any]) -> Dict[str, Any]:
35+
"""Create a copy of the schema in which `additionalProperties` is set to False for all non-null object properties.
36+
37+
This method will override the value of `additionalProperties` if it is set,
38+
or will create the property and set it to False if it does not exist.
39+
"""
40+
new_schema = copy.deepcopy(json_schema)
41+
new_schema["additionalProperties"] = False
42+
43+
def add_properties(properties):
44+
for prop_name, prop_value in properties.items():
45+
if "type" in prop_value and "object" in prop_value["type"] and len(prop_value.get("properties", [])):
46+
prop_value["additionalProperties"] = False
47+
add_properties(prop_value.get("properties", {}))
48+
elif "type" in prop_value and "array" in prop_value["type"]:
49+
if (
50+
prop_value.get("items")
51+
and "object" in prop_value.get("items", {}).get("type")
52+
and len(prop_value.get("items", {}).get("properties", []))
53+
):
54+
prop_value["items"]["additionalProperties"] = False
55+
if prop_value.get("items", {}).get("properties"):
56+
add_properties(prop_value["items"]["properties"])
57+
58+
add_properties(new_schema.get("properties", {}))
59+
return new_schema
60+
61+
2962
class CustomFormatChecker(FormatChecker):
3063
@staticmethod
3164
def check_datetime(value: str) -> bool:
@@ -46,17 +79,6 @@ def check(self, instance, format):
4679
return super().check(instance, format)
4780

4881

49-
def _enforce_no_additional_top_level_properties(json_schema: Dict[str, Any]):
50-
"""Create a copy of the schema in which `additionalProperties` is set to False for the dict of top-level properties.
51-
52-
This method will override the value of `additionalProperties` if it is set,
53-
or will create the property and set it to False if it does not exist.
54-
"""
55-
enforced_schema = copy.deepcopy(json_schema)
56-
dpath.util.new(enforced_schema, "additionalProperties", False)
57-
return enforced_schema
58-
59-
6082
def verify_records_schema(
6183
records: List[AirbyteRecordMessage], catalog: ConfiguredAirbyteCatalog, fail_on_extra_columns: bool
6284
) -> Mapping[str, Mapping[str, ValidationError]]:
@@ -66,11 +88,8 @@ def verify_records_schema(
6688
stream_validators = {}
6789
for stream in catalog.streams:
6890
schema_to_validate_against = stream.stream.json_schema
69-
if fail_on_extra_columns:
70-
schema_to_validate_against = _enforce_no_additional_top_level_properties(schema_to_validate_against)
71-
stream_validators[stream.stream.name] = Draft7ValidatorWithStrictInteger(
72-
schema_to_validate_against, format_checker=CustomFormatChecker()
73-
)
91+
validator = NoAdditionalPropertiesValidator if fail_on_extra_columns else Draft7ValidatorWithStrictInteger
92+
stream_validators[stream.stream.name] = validator(schema_to_validate_against, format_checker=CustomFormatChecker())
7493
stream_errors = defaultdict(dict)
7594
for record in records:
7695
validator = stream_validators.get(record.stream)

airbyte-integrations/bases/connector-acceptance-test/unit_tests/test_asserts.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,62 @@ def test_verify_records_schema(configured_catalog: ConfiguredAirbyteCatalog):
8787
]
8888

8989

90+
@pytest.mark.parametrize(
91+
"json_schema, record, should_fail",
92+
[
93+
(
94+
{"type": "object", "properties": {"a": {"type": "string"}}},
95+
{"a": "str", "b": "extra_string"},
96+
True
97+
),
98+
(
99+
{"type": "object", "properties": {"a": {"type": "string"}, "some_obj": {"type": ["null", "object"]}}},
100+
{"a": "str", "some_obj": {"b": "extra_string"}},
101+
False
102+
),
103+
(
104+
{
105+
"type": "object",
106+
"properties": {"a": {"type": "string"}, "some_obj": {"type": ["null", "object"], "properties": {"a": {"type": "string"}}}},
107+
},
108+
{"a": "str", "some_obj": {"a": "str", "b": "extra_string"}},
109+
True
110+
),
111+
112+
(
113+
{"type": "object", "properties": {"a": {"type": "string"}, "b": {"type": "array", "items": {"type": "object"}}}},
114+
{"a": "str", "b": [{"a": "extra_string"}]},
115+
False
116+
),
117+
(
118+
{
119+
"type": "object",
120+
"properties": {
121+
"a": {"type": "string"},
122+
"b": {"type": "array", "items": {"type": "object", "properties": {"a": {"type": "string"}}}},
123+
}
124+
},
125+
{"a": "str", "b": [{"a": "string", "b": "extra_string"}]},
126+
True
127+
),
128+
],
129+
ids=[
130+
"simple_schema_and_record_with_extra_property",
131+
"schema_with_object_without_properties_and_record_with_object_with_property",
132+
"schema_with_object_with_properties_and_record_with_object_with_extra_property",
133+
"schema_with_array_of_objects_without_properties_and_record_with_array_of_objects_with_property",
134+
"schema_with_array_of_objects_with_properties_and_record_with_array_of_objects_with_extra_property",
135+
],
136+
)
137+
def test_verify_records_schema_with_fail_on_extra_columns(configured_catalog: ConfiguredAirbyteCatalog, json_schema, record, should_fail):
138+
"""Test that fail_on_extra_columns works correctly with nested objects, array of objects"""
139+
configured_catalog.streams[0].stream.json_schema =json_schema
140+
records = [AirbyteRecordMessage(stream="my_stream", data=record, emitted_at=0)]
141+
streams_with_errors = verify_records_schema(records, configured_catalog, fail_on_extra_columns=True)
142+
errors = [error.message for error in streams_with_errors["my_stream"].values()]
143+
assert errors if should_fail else not errors
144+
145+
90146
@pytest.mark.parametrize(
91147
"record, configured_catalog, valid",
92148
[

0 commit comments

Comments
 (0)