6
6
7
7
import com .fasterxml .jackson .databind .JsonNode ;
8
8
import com .fasterxml .jackson .databind .node .ObjectNode ;
9
+ import com .google .common .base .Preconditions ;
9
10
import io .airbyte .commons .io .IOs ;
10
11
import io .airbyte .commons .resources .MoreResources ;
11
12
import io .airbyte .commons .util .MoreIterators ;
13
+ import io .airbyte .commons .util .MoreLists ;
12
14
import java .io .IOException ;
13
15
import java .nio .file .Files ;
14
16
import java .nio .file .Path ;
15
17
import java .util .ArrayList ;
16
- import java .util .Collection ;
17
18
import java .util .Collections ;
18
- import java .util .HashSet ;
19
19
import java .util .Iterator ;
20
20
import java .util .List ;
21
21
import java .util .Map .Entry ;
22
+ import java .util .Objects ;
22
23
import java .util .Optional ;
23
24
import java .util .Set ;
24
25
import java .util .function .BiConsumer ;
@@ -95,8 +96,33 @@ public static <T> Path prepareSchemas(final String resourceDir, final Class<T> k
95
96
}
96
97
}
97
98
98
- public static void traverseJsonSchema (final JsonNode jsonSchemaNode , final BiConsumer <JsonNode , String > consumer ) {
99
- traverseJsonSchemaInternal (jsonSchemaNode , JsonPaths .empty (), consumer );
99
+ /**
100
+ * Traverse a JsonSchema object. The provided consumer will be called at each node with the node and
101
+ * the path to the node.
102
+ *
103
+ * @param jsonSchema - JsonSchema object to traverse
104
+ * @param consumer - accepts the current node and the path to that node.
105
+ */
106
+ public static void traverseJsonSchema (final JsonNode jsonSchema , final BiConsumer <JsonNode , List <FieldNameOrList >> consumer ) {
107
+ traverseJsonSchemaInternal (jsonSchema , new ArrayList <>(), consumer );
108
+ }
109
+
110
+ /**
111
+ * Traverse a JsonSchema object. At each node, map a value.
112
+ *
113
+ * @param jsonSchema - JsonSchema object to traverse
114
+ * @param mapper - accepts the current node and the path to that node. whatever is returned will be
115
+ * collected and returned by the final collection.
116
+ * @param <T> - type of objects being collected
117
+ * @return - collection of all items that were collected during the traversal. Returns a { @link
118
+ * Collection } because there is no order or uniqueness guarantee so neither List nor Set
119
+ * make sense.
120
+ */
121
+ public static <T > List <T > traverseJsonSchemaWithCollector (final JsonNode jsonSchema ,
122
+ final BiFunction <JsonNode , List <FieldNameOrList >, T > mapper ) {
123
+ // for the sake of code reuse, use the filtered collector method but makes sure the filter always
124
+ // returns true.
125
+ return traverseJsonSchemaWithFilteredCollector (jsonSchema , (node , path ) -> Optional .ofNullable (mapper .apply (node , path )));
100
126
}
101
127
102
128
/**
@@ -111,44 +137,45 @@ public static void traverseJsonSchema(final JsonNode jsonSchemaNode, final BiCon
111
137
* Collection } because there is no order or uniqueness guarantee so neither List nor Set
112
138
* make sense.
113
139
*/
114
- public static <T > Collection <T > traverseJsonSchemaWithCollector (final JsonNode jsonSchema , final BiFunction <JsonNode , String , Optional <T >> mapper ) {
115
- final List <T > collectors = new ArrayList <>();
116
- traverseJsonSchema (jsonSchema , (node , path ) -> mapper .apply (node , path ).ifPresent (collectors ::add ));
117
- return collectors ;
140
+ public static <T > List <T > traverseJsonSchemaWithFilteredCollector (final JsonNode jsonSchema ,
141
+ final BiFunction <JsonNode , List <FieldNameOrList >, Optional <T >> mapper ) {
142
+ final List <T > collector = new ArrayList <>();
143
+ traverseJsonSchema (jsonSchema , (node , path ) -> mapper .apply (node , path ).ifPresent (collector ::add ));
144
+ return collector .stream ().toList (); // make list unmodifiable
118
145
}
119
146
120
147
/**
121
148
* Traverses a JsonSchema object. It returns the path to each node that meet the provided condition.
122
- * The paths are return in JsonPath format
149
+ * The paths are return in JsonPath format. The traversal is depth-first search preoorder and values
150
+ * are returned in that order.
123
151
*
124
152
* @param obj - JsonSchema object to traverse
125
153
* @param predicate - predicate to determine if the path for a node should be collected.
126
154
* @return - collection of all paths that were collected during the traversal.
127
155
*/
128
- public static Set < String > collectJsonPathsThatMeetCondition (final JsonNode obj , final Predicate <JsonNode > predicate ) {
129
- return new HashSet <>( traverseJsonSchemaWithCollector (obj , (node , path ) -> {
156
+ public static List < List < FieldNameOrList >> collectPathsThatMeetCondition (final JsonNode obj , final Predicate <JsonNode > predicate ) {
157
+ return traverseJsonSchemaWithFilteredCollector (obj , (node , path ) -> {
130
158
if (predicate .test (node )) {
131
159
return Optional .of (path );
132
160
} else {
133
161
return Optional .empty ();
134
162
}
135
- })) ;
163
+ });
136
164
}
137
165
138
166
/**
139
167
* Recursive, depth-first implementation of { @link JsonSchemas#traverseJsonSchema(final JsonNode
140
168
* jsonNode, final BiConsumer<JsonNode, List<String>> consumer) }. Takes path as argument so that
141
- * the path can be passsed to the consumer.
169
+ * the path can be passed to the consumer.
142
170
*
143
171
* @param jsonSchemaNode - jsonschema object to traverse.
144
- * @param path - path from the first call of traverseJsonSchema to the current node.
145
172
* @param consumer - consumer to be called at each node. it accepts the current node and the path to
146
173
* the node from the root of the object passed at the root level invocation
174
+ *
147
175
*/
148
- // todo (cgardens) - replace with easier to understand traversal logic from SecretsHelper.
149
176
private static void traverseJsonSchemaInternal (final JsonNode jsonSchemaNode ,
150
- final String path ,
151
- final BiConsumer <JsonNode , String > consumer ) {
177
+ final List < FieldNameOrList > path ,
178
+ final BiConsumer <JsonNode , List < FieldNameOrList > > consumer ) {
152
179
if (!jsonSchemaNode .isObject ()) {
153
180
throw new IllegalArgumentException (String .format ("json schema nodes should always be object nodes. path: %s actual: %s" , path , jsonSchemaNode ));
154
181
}
@@ -162,23 +189,20 @@ private static void traverseJsonSchemaInternal(final JsonNode jsonSchemaNode,
162
189
switch (nodeType ) {
163
190
// case BOOLEAN_TYPE, NUMBER_TYPE, STRING_TYPE, NULL_TYPE -> do nothing after consumer.accept above.
164
191
case ARRAY_TYPE -> {
165
- final String newPath = JsonPaths . appendAppendListSplat (path );
192
+ final List < FieldNameOrList > newPath = MoreLists . add (path , FieldNameOrList . list () );
166
193
// hit every node.
167
- // log.error("array: " + jsonSchemaNode);
168
194
traverseJsonSchemaInternal (jsonSchemaNode .get (JSON_SCHEMA_ITEMS_KEY ), newPath , consumer );
169
195
}
170
196
case OBJECT_TYPE -> {
171
197
final Optional <String > comboKeyWordOptional = getKeywordIfComposite (jsonSchemaNode );
172
198
if (jsonSchemaNode .has (JSON_SCHEMA_PROPERTIES_KEY )) {
173
199
for (final Iterator <Entry <String , JsonNode >> it = jsonSchemaNode .get (JSON_SCHEMA_PROPERTIES_KEY ).fields (); it .hasNext ();) {
174
200
final Entry <String , JsonNode > child = it .next ();
175
- final String newPath = JsonPaths .appendField (path , child .getKey ());
176
- // log.error("obj1: " + jsonSchemaNode);
201
+ final List <FieldNameOrList > newPath = MoreLists .add (path , FieldNameOrList .fieldName (child .getKey ()));
177
202
traverseJsonSchemaInternal (child .getValue (), newPath , consumer );
178
203
}
179
204
} else if (comboKeyWordOptional .isPresent ()) {
180
205
for (final JsonNode arrayItem : jsonSchemaNode .get (comboKeyWordOptional .get ())) {
181
- // log.error("obj2: " + jsonSchemaNode);
182
206
traverseJsonSchemaInternal (arrayItem , path , consumer );
183
207
}
184
208
} else {
@@ -206,30 +230,112 @@ private static Optional<String> getKeywordIfComposite(final JsonNode node) {
206
230
return Optional .empty ();
207
231
}
208
232
209
- public static List <String > getTypeOrObject (final JsonNode jsonNode ) {
210
- final List <String > types = getType (jsonNode );
233
+ /**
234
+ * Same logic as {@link #getType(JsonNode)} except when no type is found, it defaults to type:
235
+ * Object.
236
+ *
237
+ * @param jsonSchema - JSONSchema object
238
+ * @return type of the node.
239
+ */
240
+ public static List <String > getTypeOrObject (final JsonNode jsonSchema ) {
241
+ final List <String > types = getType (jsonSchema );
211
242
if (types .isEmpty ()) {
212
243
return List .of (OBJECT_TYPE );
213
244
} else {
214
245
return types ;
215
246
}
216
247
}
217
248
218
- public static List <String > getType (final JsonNode jsonNode ) {
219
- if (jsonNode .has (JSON_SCHEMA_TYPE_KEY )) {
220
- if (jsonNode .get (JSON_SCHEMA_TYPE_KEY ).isArray ()) {
221
- return MoreIterators .toList (jsonNode .get (JSON_SCHEMA_TYPE_KEY ).iterator ())
249
+ /**
250
+ * Get the type of JSONSchema node. Uses JSONSchema types. Only returns the type of the "top-level"
251
+ * node. e.g. if more nodes are nested underneath because it is an object or an array, only the top
252
+ * level type is returned.
253
+ *
254
+ * @param jsonSchema - JSONSchema object
255
+ * @return type of the node.
256
+ */
257
+ public static List <String > getType (final JsonNode jsonSchema ) {
258
+ if (jsonSchema .has (JSON_SCHEMA_TYPE_KEY )) {
259
+ if (jsonSchema .get (JSON_SCHEMA_TYPE_KEY ).isArray ()) {
260
+ return MoreIterators .toList (jsonSchema .get (JSON_SCHEMA_TYPE_KEY ).iterator ())
222
261
.stream ()
223
262
.map (JsonNode ::asText )
224
263
.collect (Collectors .toList ());
225
264
} else {
226
- return List .of (jsonNode .get (JSON_SCHEMA_TYPE_KEY ).asText ());
265
+ return List .of (jsonSchema .get (JSON_SCHEMA_TYPE_KEY ).asText ());
227
266
}
228
267
}
229
- if (jsonNode .has (JSON_SCHEMA_ENUM_KEY )) {
268
+ if (jsonSchema .has (JSON_SCHEMA_ENUM_KEY )) {
230
269
return List .of (STRING_TYPE );
231
270
}
232
271
return Collections .emptyList ();
233
272
}
234
273
274
+ /**
275
+ * Provides a basic scheme for describing the path into a JSON object. Each element in the path is
276
+ * either a field name or a list.
277
+ *
278
+ * This class is helpful in the case where fields can be any UTF-8 string, so the only simple way to
279
+ * keep track of the different parts of a path without going crazy with escape characters is to keep
280
+ * it in a list with list set aside as a special case.
281
+ *
282
+ * We prefer using this scheme instead of JSONPath in the tree traversal because, it is easier to
283
+ * decompose a path in this scheme than it is in JSONPath. Some callers of the traversal logic want
284
+ * to isolate parts of the path easily without the need for complex regex (that would be required if
285
+ * we used JSONPath).
286
+ */
287
+ public static class FieldNameOrList {
288
+
289
+ private final String fieldName ;
290
+ private final boolean isList ;
291
+
292
+ public static FieldNameOrList fieldName (final String fieldName ) {
293
+ return new FieldNameOrList (fieldName );
294
+ }
295
+
296
+ public static FieldNameOrList list () {
297
+ return new FieldNameOrList (null );
298
+ }
299
+
300
+ private FieldNameOrList (final String fieldName ) {
301
+ isList = fieldName == null ;
302
+ this .fieldName = fieldName ;
303
+ }
304
+
305
+ public String getFieldName () {
306
+ Preconditions .checkState (!isList , "cannot return field name, is list node" );
307
+ return fieldName ;
308
+ }
309
+
310
+ public boolean isList () {
311
+ return isList ;
312
+ }
313
+
314
+ @ Override
315
+ public boolean equals (final Object o ) {
316
+ if (this == o ) {
317
+ return true ;
318
+ }
319
+ if (!(o instanceof FieldNameOrList )) {
320
+ return false ;
321
+ }
322
+ final FieldNameOrList that = (FieldNameOrList ) o ;
323
+ return isList == that .isList && Objects .equals (fieldName , that .fieldName );
324
+ }
325
+
326
+ @ Override
327
+ public int hashCode () {
328
+ return Objects .hash (fieldName , isList );
329
+ }
330
+
331
+ @ Override
332
+ public String toString () {
333
+ return "FieldNameOrList{" +
334
+ "fieldName='" + fieldName + '\'' +
335
+ ", isList=" + isList +
336
+ '}' ;
337
+ }
338
+
339
+ }
340
+
235
341
}
0 commit comments