Skip to content

Commit f483396

Browse files
authored
migrate JsonSchemas to use basic path instead of JSONPath (#13917)
1 parent 94abef3 commit f483396

File tree

7 files changed

+236
-66
lines changed

7 files changed

+236
-66
lines changed

airbyte-commons/src/main/java/io/airbyte/commons/json/JsonPaths.java

+15
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import com.jayway.jsonpath.spi.json.JsonProvider;
1616
import com.jayway.jsonpath.spi.mapper.JacksonMappingProvider;
1717
import com.jayway.jsonpath.spi.mapper.MappingProvider;
18+
import io.airbyte.commons.json.JsonSchemas.FieldNameOrList;
1819
import io.airbyte.commons.util.MoreIterators;
1920
import java.util.Collections;
2021
import java.util.EnumSet;
@@ -94,6 +95,20 @@ public static String appendAppendListSplat(final String jsonPath) {
9495
return jsonPath + JSON_PATH_LIST_SPLAT;
9596
}
9697

98+
/**
99+
* Map path produced by {@link JsonSchemas} to the JSONPath format.
100+
*
101+
* @param jsonSchemaPath - path as described in {@link JsonSchemas}
102+
* @return path as JSONPath
103+
*/
104+
public static String mapJsonSchemaPathToJsonPath(final List<FieldNameOrList> jsonSchemaPath) {
105+
String jsonPath = empty();
106+
for (final FieldNameOrList fieldNameOrList : jsonSchemaPath) {
107+
jsonPath = fieldNameOrList.isList() ? appendAppendListSplat(jsonPath) : appendField(jsonPath, fieldNameOrList.getFieldName());
108+
}
109+
return jsonPath;
110+
}
111+
97112
/*
98113
* This version of the JsonPath Configuration object allows queries to return to the path of values
99114
* instead of the values that were found.

airbyte-commons/src/main/java/io/airbyte/commons/json/JsonSchemas.java

+136-30
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,20 @@
66

77
import com.fasterxml.jackson.databind.JsonNode;
88
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import com.google.common.base.Preconditions;
910
import io.airbyte.commons.io.IOs;
1011
import io.airbyte.commons.resources.MoreResources;
1112
import io.airbyte.commons.util.MoreIterators;
13+
import io.airbyte.commons.util.MoreLists;
1214
import java.io.IOException;
1315
import java.nio.file.Files;
1416
import java.nio.file.Path;
1517
import java.util.ArrayList;
16-
import java.util.Collection;
1718
import java.util.Collections;
18-
import java.util.HashSet;
1919
import java.util.Iterator;
2020
import java.util.List;
2121
import java.util.Map.Entry;
22+
import java.util.Objects;
2223
import java.util.Optional;
2324
import java.util.Set;
2425
import java.util.function.BiConsumer;
@@ -95,8 +96,33 @@ public static <T> Path prepareSchemas(final String resourceDir, final Class<T> k
9596
}
9697
}
9798

98-
public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiConsumer<JsonNode, String> consumer) {
99-
traverseJsonSchemaInternal(jsonSchemaNode, JsonPaths.empty(), consumer);
99+
/**
100+
* Traverse a JsonSchema object. The provided consumer will be called at each node with the node and
101+
* the path to the node.
102+
*
103+
* @param jsonSchema - JsonSchema object to traverse
104+
* @param consumer - accepts the current node and the path to that node.
105+
*/
106+
public static void traverseJsonSchema(final JsonNode jsonSchema, final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
107+
traverseJsonSchemaInternal(jsonSchema, new ArrayList<>(), consumer);
108+
}
109+
110+
/**
111+
* Traverse a JsonSchema object. At each node, map a value.
112+
*
113+
* @param jsonSchema - JsonSchema object to traverse
114+
* @param mapper - accepts the current node and the path to that node. whatever is returned will be
115+
* collected and returned by the final collection.
116+
* @param <T> - type of objects being collected
117+
* @return - collection of all items that were collected during the traversal. Returns a { @link
118+
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
119+
* make sense.
120+
*/
121+
public static <T> List<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema,
122+
final BiFunction<JsonNode, List<FieldNameOrList>, T> mapper) {
123+
// for the sake of code reuse, use the filtered collector method but makes sure the filter always
124+
// returns true.
125+
return traverseJsonSchemaWithFilteredCollector(jsonSchema, (node, path) -> Optional.ofNullable(mapper.apply(node, path)));
100126
}
101127

102128
/**
@@ -111,44 +137,45 @@ public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiCon
111137
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
112138
* make sense.
113139
*/
114-
public static <T> Collection<T> traverseJsonSchemaWithCollector(final JsonNode jsonSchema, final BiFunction<JsonNode, String, Optional<T>> mapper) {
115-
final List<T> collectors = new ArrayList<>();
116-
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collectors::add));
117-
return collectors;
140+
public static <T> List<T> traverseJsonSchemaWithFilteredCollector(final JsonNode jsonSchema,
141+
final BiFunction<JsonNode, List<FieldNameOrList>, Optional<T>> mapper) {
142+
final List<T> collector = new ArrayList<>();
143+
traverseJsonSchema(jsonSchema, (node, path) -> mapper.apply(node, path).ifPresent(collector::add));
144+
return collector.stream().toList(); // make list unmodifiable
118145
}
119146

120147
/**
121148
* Traverses a JsonSchema object. It returns the path to each node that meet the provided condition.
122-
* The paths are return in JsonPath format
149+
* The paths are return in JsonPath format. The traversal is depth-first search preoorder and values
150+
* are returned in that order.
123151
*
124152
* @param obj - JsonSchema object to traverse
125153
* @param predicate - predicate to determine if the path for a node should be collected.
126154
* @return - collection of all paths that were collected during the traversal.
127155
*/
128-
public static Set<String> collectJsonPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
129-
return new HashSet<>(traverseJsonSchemaWithCollector(obj, (node, path) -> {
156+
public static List<List<FieldNameOrList>> collectPathsThatMeetCondition(final JsonNode obj, final Predicate<JsonNode> predicate) {
157+
return traverseJsonSchemaWithFilteredCollector(obj, (node, path) -> {
130158
if (predicate.test(node)) {
131159
return Optional.of(path);
132160
} else {
133161
return Optional.empty();
134162
}
135-
}));
163+
});
136164
}
137165

138166
/**
139167
* Recursive, depth-first implementation of { @link JsonSchemas#traverseJsonSchema(final JsonNode
140168
* jsonNode, final BiConsumer<JsonNode, List<String>> consumer) }. Takes path as argument so that
141-
* the path can be passsed to the consumer.
169+
* the path can be passed to the consumer.
142170
*
143171
* @param jsonSchemaNode - jsonschema object to traverse.
144-
* @param path - path from the first call of traverseJsonSchema to the current node.
145172
* @param consumer - consumer to be called at each node. it accepts the current node and the path to
146173
* the node from the root of the object passed at the root level invocation
174+
*
147175
*/
148-
// todo (cgardens) - replace with easier to understand traversal logic from SecretsHelper.
149176
private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
150-
final String path,
151-
final BiConsumer<JsonNode, String> consumer) {
177+
final List<FieldNameOrList> path,
178+
final BiConsumer<JsonNode, List<FieldNameOrList>> consumer) {
152179
if (!jsonSchemaNode.isObject()) {
153180
throw new IllegalArgumentException(String.format("json schema nodes should always be object nodes. path: %s actual: %s", path, jsonSchemaNode));
154181
}
@@ -162,23 +189,20 @@ private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
162189
switch (nodeType) {
163190
// case BOOLEAN_TYPE, NUMBER_TYPE, STRING_TYPE, NULL_TYPE -> do nothing after consumer.accept above.
164191
case ARRAY_TYPE -> {
165-
final String newPath = JsonPaths.appendAppendListSplat(path);
192+
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.list());
166193
// hit every node.
167-
// log.error("array: " + jsonSchemaNode);
168194
traverseJsonSchemaInternal(jsonSchemaNode.get(JSON_SCHEMA_ITEMS_KEY), newPath, consumer);
169195
}
170196
case OBJECT_TYPE -> {
171197
final Optional<String> comboKeyWordOptional = getKeywordIfComposite(jsonSchemaNode);
172198
if (jsonSchemaNode.has(JSON_SCHEMA_PROPERTIES_KEY)) {
173199
for (final Iterator<Entry<String, JsonNode>> it = jsonSchemaNode.get(JSON_SCHEMA_PROPERTIES_KEY).fields(); it.hasNext();) {
174200
final Entry<String, JsonNode> child = it.next();
175-
final String newPath = JsonPaths.appendField(path, child.getKey());
176-
// log.error("obj1: " + jsonSchemaNode);
201+
final List<FieldNameOrList> newPath = MoreLists.add(path, FieldNameOrList.fieldName(child.getKey()));
177202
traverseJsonSchemaInternal(child.getValue(), newPath, consumer);
178203
}
179204
} else if (comboKeyWordOptional.isPresent()) {
180205
for (final JsonNode arrayItem : jsonSchemaNode.get(comboKeyWordOptional.get())) {
181-
// log.error("obj2: " + jsonSchemaNode);
182206
traverseJsonSchemaInternal(arrayItem, path, consumer);
183207
}
184208
} else {
@@ -206,30 +230,112 @@ private static Optional<String> getKeywordIfComposite(final JsonNode node) {
206230
return Optional.empty();
207231
}
208232

209-
public static List<String> getTypeOrObject(final JsonNode jsonNode) {
210-
final List<String> types = getType(jsonNode);
233+
/**
234+
* Same logic as {@link #getType(JsonNode)} except when no type is found, it defaults to type:
235+
* Object.
236+
*
237+
* @param jsonSchema - JSONSchema object
238+
* @return type of the node.
239+
*/
240+
public static List<String> getTypeOrObject(final JsonNode jsonSchema) {
241+
final List<String> types = getType(jsonSchema);
211242
if (types.isEmpty()) {
212243
return List.of(OBJECT_TYPE);
213244
} else {
214245
return types;
215246
}
216247
}
217248

218-
public static List<String> getType(final JsonNode jsonNode) {
219-
if (jsonNode.has(JSON_SCHEMA_TYPE_KEY)) {
220-
if (jsonNode.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
221-
return MoreIterators.toList(jsonNode.get(JSON_SCHEMA_TYPE_KEY).iterator())
249+
/**
250+
* Get the type of JSONSchema node. Uses JSONSchema types. Only returns the type of the "top-level"
251+
* node. e.g. if more nodes are nested underneath because it is an object or an array, only the top
252+
* level type is returned.
253+
*
254+
* @param jsonSchema - JSONSchema object
255+
* @return type of the node.
256+
*/
257+
public static List<String> getType(final JsonNode jsonSchema) {
258+
if (jsonSchema.has(JSON_SCHEMA_TYPE_KEY)) {
259+
if (jsonSchema.get(JSON_SCHEMA_TYPE_KEY).isArray()) {
260+
return MoreIterators.toList(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).iterator())
222261
.stream()
223262
.map(JsonNode::asText)
224263
.collect(Collectors.toList());
225264
} else {
226-
return List.of(jsonNode.get(JSON_SCHEMA_TYPE_KEY).asText());
265+
return List.of(jsonSchema.get(JSON_SCHEMA_TYPE_KEY).asText());
227266
}
228267
}
229-
if (jsonNode.has(JSON_SCHEMA_ENUM_KEY)) {
268+
if (jsonSchema.has(JSON_SCHEMA_ENUM_KEY)) {
230269
return List.of(STRING_TYPE);
231270
}
232271
return Collections.emptyList();
233272
}
234273

274+
/**
275+
* Provides a basic scheme for describing the path into a JSON object. Each element in the path is
276+
* either a field name or a list.
277+
*
278+
* This class is helpful in the case where fields can be any UTF-8 string, so the only simple way to
279+
* keep track of the different parts of a path without going crazy with escape characters is to keep
280+
* it in a list with list set aside as a special case.
281+
*
282+
* We prefer using this scheme instead of JSONPath in the tree traversal because, it is easier to
283+
* decompose a path in this scheme than it is in JSONPath. Some callers of the traversal logic want
284+
* to isolate parts of the path easily without the need for complex regex (that would be required if
285+
* we used JSONPath).
286+
*/
287+
public static class FieldNameOrList {
288+
289+
private final String fieldName;
290+
private final boolean isList;
291+
292+
public static FieldNameOrList fieldName(final String fieldName) {
293+
return new FieldNameOrList(fieldName);
294+
}
295+
296+
public static FieldNameOrList list() {
297+
return new FieldNameOrList(null);
298+
}
299+
300+
private FieldNameOrList(final String fieldName) {
301+
isList = fieldName == null;
302+
this.fieldName = fieldName;
303+
}
304+
305+
public String getFieldName() {
306+
Preconditions.checkState(!isList, "cannot return field name, is list node");
307+
return fieldName;
308+
}
309+
310+
public boolean isList() {
311+
return isList;
312+
}
313+
314+
@Override
315+
public boolean equals(final Object o) {
316+
if (this == o) {
317+
return true;
318+
}
319+
if (!(o instanceof FieldNameOrList)) {
320+
return false;
321+
}
322+
final FieldNameOrList that = (FieldNameOrList) o;
323+
return isList == that.isList && Objects.equals(fieldName, that.fieldName);
324+
}
325+
326+
@Override
327+
public int hashCode() {
328+
return Objects.hash(fieldName, isList);
329+
}
330+
331+
@Override
332+
public String toString() {
333+
return "FieldNameOrList{" +
334+
"fieldName='" + fieldName + '\'' +
335+
", isList=" + isList +
336+
'}';
337+
}
338+
339+
}
340+
235341
}

airbyte-commons/src/main/java/io/airbyte/commons/util/MoreLists.java

+14
Original file line numberDiff line numberDiff line change
@@ -48,4 +48,18 @@ public static <T> List<T> concat(final List<T>... lists) {
4848
return Stream.of(lists).flatMap(List::stream).toList();
4949
}
5050

51+
/**
52+
* Copies provided list and adds the new item to the copy.
53+
*
54+
* @param list list to copy and add to
55+
* @param toAdd item to add
56+
* @param <T> type of list
57+
* @return new list with contents of provided list and the added item
58+
*/
59+
public static <T> List<T> add(final List<T> list, final T toAdd) {
60+
final ArrayList<T> newList = new ArrayList<>(list);
61+
newList.add(toAdd);
62+
return newList;
63+
}
64+
5165
}

0 commit comments

Comments
 (0)