Skip to content

Commit 07e2232

Browse files
authored
Track const config values in analytics (#10120)
1 parent a66d8be commit 07e2232

File tree

5 files changed

+316
-40
lines changed

5 files changed

+316
-40
lines changed

airbyte-commons/src/main/java/io/airbyte/commons/json/Jsons.java

+59-2
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44

55
package io.airbyte.commons.json;
66

7+
import static java.util.Collections.singletonMap;
8+
import static java.util.stream.Collectors.toMap;
9+
710
import com.fasterxml.jackson.core.JsonProcessingException;
811
import com.fasterxml.jackson.core.type.TypeReference;
912
import com.fasterxml.jackson.core.util.DefaultPrettyPrinter;
@@ -19,9 +22,12 @@
1922
import java.io.IOException;
2023
import java.util.Arrays;
2124
import java.util.Collections;
25+
import java.util.HashMap;
2226
import java.util.HashSet;
27+
import java.util.Iterator;
2328
import java.util.List;
2429
import java.util.Map;
30+
import java.util.Map.Entry;
2531
import java.util.Optional;
2632
import java.util.Set;
2733
import java.util.function.BiConsumer;
@@ -198,8 +204,59 @@ public static int getIntOrZero(final JsonNode json, final List<String> keys) {
198204
}
199205

200206
/**
201-
* By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" :
202-
* "airbyte"}. We prefer {"name": "airbyte"}.
207+
* Flattens an ObjectNode, or dumps it into a {null: value} map if it's not an object.
208+
*/
209+
public static Map<String, Object> flatten(final JsonNode node) {
210+
if (node.isObject()) {
211+
final Map<String, Object> output = new HashMap<>();
212+
for (final Iterator<Entry<String, JsonNode>> it = node.fields(); it.hasNext(); ) {
213+
final Entry<String, JsonNode> entry = it.next();
214+
final String field = entry.getKey();
215+
final JsonNode value = entry.getValue();
216+
mergeMaps(output, field, flatten(value));
217+
}
218+
return output;
219+
} else {
220+
final Object value;
221+
if (node.isBoolean()) {
222+
value = node.asBoolean();
223+
} else if (node.isLong()) {
224+
value = node.asLong();
225+
} else if (node.isInt()) {
226+
value = node.asInt();
227+
} else if (node.isDouble()) {
228+
value = node.asDouble();
229+
} else if (node.isValueNode() && !node.isNull()) {
230+
value = node.asText();
231+
} else {
232+
// Fallback handling for e.g. arrays
233+
value = node.toString();
234+
}
235+
return singletonMap(null, value);
236+
}
237+
}
238+
239+
/**
240+
* Prepend all keys in subMap with prefix, then merge that map into originalMap.
241+
* <p>
242+
* If subMap contains a null key, then instead it is replaced with prefix. I.e. {null: value} is treated as {prefix: value} when merging into
243+
* originalMap.
244+
*/
245+
public static void mergeMaps(final Map<String, Object> originalMap, final String prefix, final Map<String, Object> subMap) {
246+
originalMap.putAll(subMap.entrySet().stream().collect(toMap(
247+
e -> {
248+
final String key = e.getKey();
249+
if (key != null) {
250+
return prefix + "." + key;
251+
} else {
252+
return prefix;
253+
}
254+
},
255+
Entry::getValue)));
256+
}
257+
258+
/**
259+
* By the Jackson DefaultPrettyPrinter prints objects with an extra space as follows: {"name" : "airbyte"}. We prefer {"name": "airbyte"}.
203260
*/
204261
private static class JsonPrettyPrinter extends DefaultPrettyPrinter {
205262

airbyte-scheduler/persistence/src/main/java/io/airbyte/scheduler/persistence/job_tracker/JobTracker.java

+106-28
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,17 @@
44

55
package io.airbyte.scheduler.persistence.job_tracker;
66

7+
import static java.util.Collections.emptyMap;
8+
import static java.util.Collections.singletonMap;
9+
710
import com.fasterxml.jackson.databind.JsonNode;
8-
import com.fasterxml.jackson.databind.node.ObjectNode;
11+
import com.fasterxml.jackson.databind.ObjectMapper;
912
import com.google.common.annotations.VisibleForTesting;
1013
import com.google.common.base.Preconditions;
1114
import com.google.common.collect.ImmutableMap;
1215
import com.google.common.collect.ImmutableMap.Builder;
1316
import io.airbyte.analytics.TrackingClient;
17+
import io.airbyte.commons.json.Jsons;
1418
import io.airbyte.commons.lang.Exceptions;
1519
import io.airbyte.commons.map.MoreMaps;
1620
import io.airbyte.config.JobConfig;
@@ -28,12 +32,13 @@
2832
import io.airbyte.scheduler.models.Job;
2933
import io.airbyte.scheduler.persistence.JobPersistence;
3034
import io.airbyte.scheduler.persistence.WorkspaceHelper;
35+
import io.airbyte.validation.json.JsonSchemaValidator;
3136
import io.airbyte.validation.json.JsonValidationException;
3237
import java.io.IOException;
33-
import java.util.Collections;
3438
import java.util.HashMap;
3539
import java.util.Iterator;
3640
import java.util.Map;
41+
import java.util.Map.Entry;
3742
import java.util.UUID;
3843

3944
public class JobTracker {
@@ -50,6 +55,8 @@ public enum JobState {
5055
public static final String OPERATION = "operation.";
5156
public static final String SET = "set";
5257

58+
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
59+
5360
private final ConfigRepository configRepository;
5461
private final JobPersistence jobPersistence;
5562
private final WorkspaceHelper workspaceHelper;
@@ -118,16 +125,21 @@ public void trackSync(final Job job, final JobState jobState) {
118125
Preconditions.checkArgument(allowedJob, "Job type " + configType + " is not allowed!");
119126
final long jobId = job.getId();
120127
final UUID connectionId = UUID.fromString(job.getScope());
121-
final UUID sourceDefinitionId = configRepository.getSourceDefinitionFromConnection(connectionId).getSourceDefinitionId();
122-
final UUID destinationDefinitionId = configRepository.getDestinationDefinitionFromConnection(connectionId).getDestinationDefinitionId();
128+
final StandardSourceDefinition sourceDefinition = configRepository.getSourceDefinitionFromConnection(connectionId);
129+
final UUID sourceDefinitionId = sourceDefinition.getSourceDefinitionId();
130+
final StandardDestinationDefinition destinationDefinition = configRepository.getDestinationDefinitionFromConnection(connectionId);
131+
final UUID destinationDefinitionId = destinationDefinition.getDestinationDefinitionId();
123132

124133
final Map<String, Object> jobMetadata = generateJobMetadata(String.valueOf(jobId), configType, job.getAttemptsCount());
125134
final Map<String, Object> jobAttemptMetadata = generateJobAttemptMetadata(job.getId(), jobState);
126135
final Map<String, Object> sourceDefMetadata = generateSourceDefinitionMetadata(sourceDefinitionId);
127136
final Map<String, Object> destinationDefMetadata = generateDestinationDefinitionMetadata(destinationDefinitionId);
128137
final Map<String, Object> syncMetadata = generateSyncMetadata(connectionId);
129138
final Map<String, Object> stateMetadata = generateStateMetadata(jobState);
130-
final Map<String, Object> syncConfigMetadata = generateSyncConfigMetadata(job.getConfig());
139+
final Map<String, Object> syncConfigMetadata = generateSyncConfigMetadata(
140+
job.getConfig(),
141+
sourceDefinition.getSpec().getConnectionSpecification(),
142+
destinationDefinition.getSpec().getConnectionSpecification());
131143

132144
final UUID workspaceId = workspaceHelper.getWorkspaceForJobIdIgnoreExceptions(jobId);
133145
track(workspaceId,
@@ -142,18 +154,20 @@ public void trackSync(final Job job, final JobState jobState) {
142154
});
143155
}
144156

145-
private Map<String, Object> generateSyncConfigMetadata(final JobConfig config) {
157+
private Map<String, Object> generateSyncConfigMetadata(final JobConfig config,
158+
final JsonNode sourceConfigSchema,
159+
final JsonNode destinationConfigSchema) {
146160
if (config.getConfigType() == ConfigType.SYNC) {
147161
final JsonNode sourceConfiguration = config.getSync().getSourceConfiguration();
148162
final JsonNode destinationConfiguration = config.getSync().getDestinationConfiguration();
149163

150-
final Map<String, Object> sourceMetadata = configToMetadata(CONFIG + ".source", sourceConfiguration);
151-
final Map<String, Object> destinationMetadata = configToMetadata(CONFIG + ".destination", destinationConfiguration);
164+
final Map<String, Object> sourceMetadata = configToMetadata(CONFIG + ".source", sourceConfiguration, sourceConfigSchema);
165+
final Map<String, Object> destinationMetadata = configToMetadata(CONFIG + ".destination", destinationConfiguration, destinationConfigSchema);
152166
final Map<String, Object> catalogMetadata = getCatalogMetadata(config.getSync().getConfiguredAirbyteCatalog());
153167

154168
return MoreMaps.merge(sourceMetadata, destinationMetadata, catalogMetadata);
155169
} else {
156-
return Collections.emptyMap();
170+
return emptyMap();
157171
}
158172
}
159173

@@ -168,30 +182,94 @@ private Map<String, Object> getCatalogMetadata(final ConfiguredAirbyteCatalog ca
168182
return output;
169183
}
170184

171-
protected static Map<String, Object> configToMetadata(final String jsonPath, final JsonNode config) {
185+
/**
186+
* Flattens a config into a map. Uses the schema to determine which fields are const (i.e.
187+
* non-sensitive). Non-const, non-boolean values are replaced with {@link #SET} to avoid leaking
188+
* potentially-sensitive information.
189+
* <p>
190+
* anyOf/allOf schemas are treated as non-const values. These aren't (currently) used in config
191+
* schemas anyway.
192+
*
193+
* @param jsonPath A prefix to add to all the keys in the returned map, with a period (`.`)
194+
* separator
195+
* @param schema The JSON schema that {@code config} conforms to
196+
*/
197+
protected static Map<String, Object> configToMetadata(final String jsonPath, final JsonNode config, final JsonNode schema) {
198+
final Map<String, Object> metadata = configToMetadata(config, schema);
199+
// Prepend all the keys with the root jsonPath
200+
// But leave the values unchanged
172201
final Map<String, Object> output = new HashMap<>();
202+
Jsons.mergeMaps(output, jsonPath, metadata);
203+
return output;
204+
}
205+
206+
/**
207+
* Does the actually interesting bits of configToMetadata. If config is an object, returns a
208+
* flattened map. If config is _not_ an object (i.e. it's a primitive string/number/etc, or it's an
209+
* array) then returns a map of {null: toMetadataValue(config)}.
210+
*/
211+
private static Map<String, Object> configToMetadata(final JsonNode config, final JsonNode schema) {
212+
if (schema.hasNonNull("const")) {
213+
// If this schema is a const, then just dump it into a map:
214+
// * If it's an object, flatten it
215+
// * Otherwise, do some basic conversions to value-ish data.
216+
// It would be a weird thing to declare const: null, but in that case we don't want to report null
217+
// anyway, so explicitly use hasNonNull.
218+
return Jsons.flatten(config);
219+
} else if (schema.has("oneOf")) {
220+
// If this schema is a oneOf, then find the first sub-schema which the config matches
221+
// and use that sub-schema to convert the config to a map
222+
final JsonSchemaValidator validator = new JsonSchemaValidator();
223+
for (final Iterator<JsonNode> it = schema.get("oneOf").elements(); it.hasNext();) {
224+
final JsonNode subSchema = it.next();
225+
if (validator.test(subSchema, config)) {
226+
return configToMetadata(config, subSchema);
227+
}
228+
}
229+
// If we didn't match any of the subschemas, then something is wrong. Bail out silently.
230+
return emptyMap();
231+
} else if (config.isObject()) {
232+
// If the schema is not a oneOf, but the config is an object (i.e. the schema has "type": "object")
233+
// then we need to recursively convert each field of the object to a map.
234+
final Map<String, Object> output = new HashMap<>();
235+
final JsonNode maybeProperties = schema.get("properties");
236+
237+
// If additionalProperties is not set, or it's a boolean, then there's no schema for additional properties. Use the accept-all schema.
238+
// Otherwise, it's an actual schema.
239+
final JsonNode maybeAdditionalProperties = schema.get("additionalProperties");
240+
final JsonNode additionalPropertiesSchema;
241+
if (maybeAdditionalProperties == null || maybeAdditionalProperties.isBoolean()) {
242+
additionalPropertiesSchema = OBJECT_MAPPER.createObjectNode();
243+
} else {
244+
additionalPropertiesSchema = maybeAdditionalProperties;
245+
}
173246

174-
if (config.isObject()) {
175-
final ObjectNode node = (ObjectNode) config;
176-
for (final Iterator<Map.Entry<String, JsonNode>> it = node.fields(); it.hasNext();) {
177-
final var entry = it.next();
178-
final var field = entry.getKey();
179-
final var fieldJsonPath = jsonPath + "." + field;
180-
final var child = entry.getValue();
181-
182-
if (child.isBoolean()) {
183-
output.put(fieldJsonPath, child.asBoolean());
184-
} else if (!child.isNull()) {
185-
if (child.isObject()) {
186-
output.putAll(configToMetadata(fieldJsonPath, child));
187-
} else if (!child.isTextual() || (child.isTextual() && !child.asText().isEmpty())) {
188-
output.put(fieldJsonPath, SET);
189-
}
247+
for (final Iterator<Entry<String, JsonNode>> it = config.fields(); it.hasNext(); ) {
248+
final Entry<String, JsonNode> entry = it.next();
249+
final String field = entry.getKey();
250+
final JsonNode value = entry.getValue();
251+
252+
final JsonNode propertySchema;
253+
if (maybeProperties != null && maybeProperties.hasNonNull(field)) {
254+
// If this property is explicitly declared, then use its schema
255+
propertySchema = maybeProperties.get(field);
256+
} else {
257+
// otherwise, use the additionalProperties schema
258+
propertySchema = additionalPropertiesSchema;
190259
}
260+
261+
Jsons.mergeMaps(output, field, configToMetadata(value, propertySchema));
191262
}
263+
return output;
264+
} else if (config.isBoolean()) {
265+
return singletonMap(null, config.asBoolean());
266+
} else if ((!config.isTextual() && !config.isNull()) || (config.isTextual() && !config.asText().isEmpty())) {
267+
// This is either non-textual (e.g. integer, array, etc) or non-empty text
268+
return singletonMap(null, SET);
269+
} else {
270+
// Otherwise, this is an empty string, so just ignore it
271+
return emptyMap();
192272
}
193-
194-
return output;
195273
}
196274

197275
private Map<String, Object> generateSyncMetadata(final UUID connectionId) throws ConfigNotFoundException, IOException, JsonValidationException {

airbyte-scheduler/persistence/src/main/resources/example_config.json

+21
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@
55
"empty_string": "",
66
"null_value": null,
77
"one_of": {
8+
"type_key": "foo",
89
"some_key": 100
10+
},
11+
"const_object": {
12+
"sub_key": "bar",
13+
"sub_array": [1, 2, 3],
14+
"sub_object": {
15+
"sub_sub_key": "baz"
16+
}
17+
},
18+
"const_null": null,
19+
"additionalPropertiesUnset": {
20+
"foo": "bar"
21+
},
22+
"additionalPropertiesBoolean": {
23+
"foo": "bar"
24+
},
25+
"additionalPropertiesSchema": {
26+
"foo": 42
27+
},
28+
"additionalPropertiesConst": {
29+
"foo": 42
930
}
1031
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"type": "object",
3+
"properties": {
4+
"username": {
5+
"type": "string"
6+
},
7+
"password": {
8+
"type": "string"
9+
},
10+
"has_ssl": {
11+
"type": "boolean"
12+
},
13+
"empty_string": {
14+
"type": "string"
15+
},
16+
"null_value": {
17+
"type": "null"
18+
},
19+
"one_of": {
20+
"type": "object",
21+
"oneOf": [
22+
{
23+
"type": "object",
24+
"properties": {
25+
"type_key": {
26+
"const": "foo"
27+
},
28+
"some_key": {
29+
"type": "integer"
30+
}
31+
}
32+
}
33+
]
34+
},
35+
"const_object": {
36+
"const": {
37+
"sub_key": "bar",
38+
"sub_array": [1, 2, 3],
39+
"sub_object": {
40+
"sub_sub_key": "baz"
41+
}
42+
}
43+
},
44+
"const_null": {
45+
"const": null
46+
},
47+
"additionalPropertiesUnset": {
48+
"type": "object"
49+
},
50+
"additionalPropertiesBoolean": {
51+
"type": "object",
52+
"additionalProperties": true
53+
},
54+
"additionalPropertiesSchema": {
55+
"type": "object",
56+
"additionalProperties": {
57+
"type": "integer"
58+
}
59+
},
60+
"additionalPropertiesConst": {
61+
"type": "object",
62+
"additionalProperties": {
63+
"const": 42
64+
}
65+
}
66+
}
67+
}

0 commit comments

Comments
 (0)