Skip to content

Commit 2ef1a5a

Browse files
Fix for nullable top-level schemas (#44401)
1 parent 9a9c5f4 commit 2ef1a5a

File tree

18 files changed

+346
-45
lines changed

18 files changed

+346
-45
lines changed
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.44.14
1+
version=0.44.15
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":"string1","union_type":10,"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[25,null,"[\"goodbye\",\"cruel world\"]"],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "empty_object": "{}", "object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null}
2-
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":20,"union_type":"string2","schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null}
3-
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting":null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null }
1+
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":"string1","union_type":10,"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[25,null,"[\"goodbye\",\"cruel world\"]"],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "empty_object": "{}", "object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null, "compact_union": { "id": 10, "name": "Tyler" } }
2+
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":20,"union_type":"string2","schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null, "compact_union": 4444 }
3+
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting":null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null, "compact_union": null }

airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/resources/v0/problematic_types_coerced_schemaless_schema.json

+18
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,24 @@
154154
},
155155
"combined_nulls": {
156156
"type": "null"
157+
},
158+
"compact_union": {
159+
"oneOf": [
160+
{
161+
"type": "object",
162+
"properties": {
163+
"id": {
164+
"type": "integer"
165+
},
166+
"name": {
167+
"type": "string"
168+
}
169+
}
170+
},
171+
{
172+
"type": "integer"
173+
}
174+
]
157175
}
158176
}
159177
}

airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/resources/v0/problematic_types_configured_catalog.json

+11
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,17 @@
189189
},
190190
"combined_nulls": {
191191
"type": ["null", "null"]
192+
},
193+
"compact_union": {
194+
"type": ["object", "integer"],
195+
"properties": {
196+
"id": {
197+
"type": "integer"
198+
},
199+
"name": {
200+
"type": "string"
201+
}
202+
}
192203
}
193204
}
194205
}
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":{"type":"string","string":"string1","integer":null},"union_type":{"type":"integer","string":null,"integer":10},"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[{"type":"integer","integer":25,"string":null},null,{"type":"string","integer":null,"string":"[\"goodbye\",\"cruel world\"]"}],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": {"type":"integer","integer":1,"string":null}, "name": "Jenny" }, "empty_object": "{}","object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null":"barfoo", "combined_nulls": null }}
2-
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":{"type":"integer","string":null,"integer":20},"union_type":{"type":"string","string":"string2","integer":null},"schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": {"type":"string","integer":null,"string":"seal-one-hippity"}, "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null}
3-
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null }
1+
{"schemaless_object":"{\"uuid\":\"38F52396-736D-4B23-B5B4-F504D8894B97\",\"probability\":1.5}","schematized_object":{"id":1,"name":"Joe"},"combined_type":{"type":"string","string":"string1","integer":null},"union_type":{"type":"integer","string":null,"integer":10},"schemaless_array":"[10,\"foo\",null,{\"bar\":\"qua\"}]","mixed_array_integer_and_schemaless_object":[15,null,"{\"hello\":\"world\"}"],"array_of_union_integer_and_schemaless_array":[{"type":"integer","integer":25,"string":null},null,{"type":"string","integer":null,"string":"[\"goodbye\",\"cruel world\"]"}],"union_of_objects_with_properties_identical":{"id":10,"name":"Joe"},"union_of_objects_with_properties_overlapping":{"id":20,"name":"Jane","flagged":true},"union_of_objects_with_properties_nonoverlapping":{"id":30,"name":"Phil","flagged":false,"description":"Very Phil"}, "union_of_objects_with_properties_contradicting": { "id": {"type":"integer","integer":1,"string":null}, "name": "Jenny" }, "empty_object": "{}","object_with_null_properties": "{}", "combined_with_null": "foobar", "union_with_null":"barfoo", "combined_nulls": null, "compact_union": {"type": "object", "object": { "id": 10, "name": "Tyler" }, "integer": null } }
2+
{"schemaless_object":"{\"address\":{\"street\":\"113 Hickey Rd\",\"zip\":\"37932\"},\"flags\":[true,false,false]}","schematized_object":{"id":2,"name":"Jane"},"combined_type":{"type":"integer","string":null,"integer":20},"union_type":{"type":"string","string":"string2","integer":null},"schemaless_array":"[]","mixed_array_integer_and_schemaless_object":[],"array_of_union_integer_and_schemaless_array":[],"union_of_objects_with_properties_identical":{"id":null,"name":null},"union_of_objects_with_properties_overlapping":{"id":null,"name":null,"flagged":null},"union_of_objects_with_properties_nonoverlapping":{"id":null,"name":null,"flagged":null,"description":null}, "union_of_objects_with_properties_contradicting": { "id": {"type":"string","integer":null,"string":"seal-one-hippity"}, "name": "James" }, "empty_object": "{\"extra\":\"stuff\"}", "object_with_null_properties": "{\"more\":{\"extra\":\"stuff\"}}", "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null, "compact_union": {"type":"integer","integer":4444,"object":null} }
3+
{ "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "union_of_objects_with_properties_contradicting": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null, "compact_union": null }

airbyte-cdk/java/airbyte-cdk/db-destinations/src/testFixtures/resources/v0/problematic_types_disjoint_union_schema.json

+18
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,24 @@
163163
},
164164
"combined_nulls": {
165165
"type": "null"
166+
},
167+
"compact_union": {
168+
"oneOf": [
169+
{
170+
"type": "object",
171+
"properties": {
172+
"id": {
173+
"type": "integer"
174+
},
175+
"name": {
176+
"type": "string"
177+
}
178+
}
179+
},
180+
{
181+
"type": "integer"
182+
}
183+
]
166184
}
167185
}
168186
}
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589100, "data": { "schemaless_object": { "uuid": "38F52396-736D-4B23-B5B4-F504D8894B97", "probability": 1.5 }, "schematized_object": { "id": 1, "name": "Joe" }, "combined_type": "string1", "union_type": 10, "schemaless_array": [ 10, "foo", null, { "bar": "qua" } ], "mixed_array_integer_and_schemaless_object": [ 15, null, { "hello": "world" } ], "array_of_union_integer_and_schemaless_array": [ 25, null, ["goodbye", "cruel world"] ], "union_of_objects_with_properties_identical": { "id": 10, "name": "Joe" }, "union_of_objects_with_properties_overlapping": { "id": 20, "name": "Jane", "flagged": true }, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "union_of_objects_with_properties_nonoverlapping": { "id": 30, "name": "Phil", "flagged": false, "description":"Very Phil" }, "empty_object": {},"object_with_null_properties": {}, "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null } } }
2-
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589200, "data": { "schemaless_object": { "address": { "street": "113 Hickey Rd", "zip": "37932" }, "flags": [ true, false, false ] }, "schematized_object": { "id": 2, "name": "Jane" }, "combined_type": 20, "union_type": "string2", "schemaless_array": [], "mixed_array_integer_and_schemaless_object": [ ], "array_of_union_integer_and_schemaless_array": [ ], "union_of_objects_with_properties_identical": { }, "union_of_objects_with_properties_overlapping": {}, "union_of_objects_with_properties_nonoverlapping": {}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": {"extra": "stuff"}, "object_with_null_properties": { "more": { "extra": "stuff" } }, "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null } } }
3-
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589300, "data": { "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null } } }
1+
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589100, "data": { "schemaless_object": { "uuid": "38F52396-736D-4B23-B5B4-F504D8894B97", "probability": 1.5 }, "schematized_object": { "id": 1, "name": "Joe" }, "combined_type": "string1", "union_type": 10, "schemaless_array": [ 10, "foo", null, { "bar": "qua" } ], "mixed_array_integer_and_schemaless_object": [ 15, null, { "hello": "world" } ], "array_of_union_integer_and_schemaless_array": [ 25, null, ["goodbye", "cruel world"] ], "union_of_objects_with_properties_identical": { "id": 10, "name": "Joe" }, "union_of_objects_with_properties_overlapping": { "id": 20, "name": "Jane", "flagged": true }, "union_of_objects_with_properties_contradicting": { "id": 1, "name": "Jenny" }, "union_of_objects_with_properties_nonoverlapping": { "id": 30, "name": "Phil", "flagged": false, "description":"Very Phil" }, "empty_object": {},"object_with_null_properties": {}, "combined_with_null": "foobar", "union_with_null": "barfoo", "combined_nulls": null, "compact_union": { "id": 10, "name": "Tyler" } } } }
2+
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589200, "data": { "schemaless_object": { "address": { "street": "113 Hickey Rd", "zip": "37932" }, "flags": [ true, false, false ] }, "schematized_object": { "id": 2, "name": "Jane" }, "combined_type": 20, "union_type": "string2", "schemaless_array": [], "mixed_array_integer_and_schemaless_object": [ ], "array_of_union_integer_and_schemaless_array": [ ], "union_of_objects_with_properties_identical": { }, "union_of_objects_with_properties_overlapping": {}, "union_of_objects_with_properties_nonoverlapping": {}, "union_of_objects_with_properties_contradicting": { "id": "seal-one-hippity", "name": "James" }, "empty_object": {"extra": "stuff"}, "object_with_null_properties": { "more": { "extra": "stuff" } }, "combined_with_null": "foobar2", "union_with_null": "barfoo2", "combined_nulls": null, "compact_union": 4444 } } }
3+
{"type": "RECORD", "record": {"stream": "problematic_types", "emitted_at": 1602637589300, "data": { "schemaless_object": null, "schematized_object": null, "combined_type": null, "union_type": null, "schemaless_array": null, "mixed_array_integer_and_schemaless_object": null, "array_of_union_integer_and_schemaless_array": null, "union_of_objects_with_properties_identical": null, "union_of_objects_with_properties_overlapping": null, "union_of_objects_with_properties_nonoverlapping": null, "empty_object": null, "object_with_null_properties": null, "combined_with_null": null, "union_with_null": null, "combined_nulls": null, "compact_union": null } } }
44
{"type": "STATE", "state": { "data": {"start_date": "2022-02-14"}}}
55
{"type": "TRACE", "trace": { "type": "STREAM_STATUS", "stream_status": {"stream_descriptor": {"name": "problematic_types"}, "status": "COMPLETE"}, "emitted_at": 1721428636000}}

airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/s3/jsonschema/AirbyteJsonSchemaType.kt

+14-5
Original file line numberDiff line numberDiff line change
@@ -84,15 +84,22 @@ enum class AirbyteJsonSchemaType {
8484

8585
val type = schema["type"]
8686
if (type != null) {
87-
if (type.isArray && type.size() > 1) {
87+
val typeArray =
88+
if (type.isArray) {
89+
type.elements().asSequence().filter { it.asText() != "null" }.toList()
90+
} else {
91+
listOf(type)
92+
}
93+
94+
if (typeArray.size > 1) {
8895
return COMBINED
8996
}
9097

9198
val typeStr =
92-
if (type.isArray) {
93-
type[0].asText()
99+
if (typeArray.isEmpty()) {
100+
"null"
94101
} else {
95-
type.asText()
102+
typeArray[0].asText()
96103
}
97104

98105
val format = schema["format"]?.asText()
@@ -233,7 +240,9 @@ enum class AirbyteJsonSchemaType {
233240
}
234241
.toList()
235242
if (matching.isEmpty()) {
236-
throw IllegalArgumentException("Union type does not match any options")
243+
throw IllegalArgumentException(
244+
"Union type ${value::class}(value redacted) does not match any options: $optionsAsList"
245+
)
237246
}
238247
return matching.first()
239248
}

0 commit comments

Comments
 (0)