@@ -8,7 +8,7 @@ import io.airbyte.cdk.integrations.base.AirbyteExceptionHandler.Companion.addStr
8
8
import io.airbyte.cdk.integrations.base.JavaBaseConstants
9
9
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog
10
10
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream
11
- import java.util.*
11
+ import java.util.Optional
12
12
import java.util.function.Consumer
13
13
import org.apache.commons.codec.digest.DigestUtils
14
14
import org.slf4j.Logger
@@ -50,31 +50,31 @@ constructor(
50
50
// We're taking a hash of the quoted namespace and the unquoted stream name
51
51
val hash =
52
52
DigestUtils .sha1Hex(
53
- originalStreamConfig.id!! .finalNamespace + " &airbyte&" + originalName
53
+ " ${ originalStreamConfig.id.finalNamespace} &airbyte&$ originalName"
54
54
)
55
55
.substring(0 , 3 )
56
- val newName = originalName + " _ " + hash
56
+ val newName = " ${originalName} _ $ hash"
57
57
actualStreamConfig =
58
58
StreamConfig (
59
59
sqlGenerator.buildStreamId(originalNamespace, newName, rawNamespace),
60
60
originalStreamConfig.syncMode,
61
61
originalStreamConfig.destinationSyncMode,
62
62
originalStreamConfig.primaryKey,
63
63
originalStreamConfig.cursor,
64
- originalStreamConfig.columns
64
+ originalStreamConfig.columns,
65
65
)
66
66
} else {
67
67
actualStreamConfig = originalStreamConfig
68
68
}
69
69
streamConfigs.add(actualStreamConfig)
70
70
71
71
// Populate some interesting strings into the exception handler string deinterpolator
72
- addStringForDeinterpolation(actualStreamConfig.id!! .rawNamespace)
73
- addStringForDeinterpolation(actualStreamConfig.id!! .rawName)
74
- addStringForDeinterpolation(actualStreamConfig.id!! .finalNamespace)
75
- addStringForDeinterpolation(actualStreamConfig.id!! .finalName)
76
- addStringForDeinterpolation(actualStreamConfig.id!! .originalNamespace)
77
- addStringForDeinterpolation(actualStreamConfig.id!! .originalName)
72
+ addStringForDeinterpolation(actualStreamConfig.id.rawNamespace)
73
+ addStringForDeinterpolation(actualStreamConfig.id.rawName)
74
+ addStringForDeinterpolation(actualStreamConfig.id.finalNamespace)
75
+ addStringForDeinterpolation(actualStreamConfig.id.finalName)
76
+ addStringForDeinterpolation(actualStreamConfig.id.originalNamespace)
77
+ addStringForDeinterpolation(actualStreamConfig.id.originalName)
78
78
actualStreamConfig.columns!!
79
79
.keys
80
80
.forEach(
@@ -101,19 +101,14 @@ constructor(
101
101
return ParsedCatalog (streamConfigs)
102
102
}
103
103
104
- // TODO maybe we should extract the column collision stuff to a separate method, since that's
105
- // the
106
- // interesting bit
107
104
@VisibleForTesting
108
105
fun toStreamConfig (stream : ConfiguredAirbyteStream ): StreamConfig {
109
106
val schema: AirbyteType = AirbyteType .Companion .fromJsonSchema(stream.stream.jsonSchema)
110
107
val airbyteColumns =
111
- if (schema is Struct ) {
112
- schema.properties
113
- } else if (schema is Union ) {
114
- schema.asColumns()
115
- } else {
116
- throw IllegalArgumentException (" Top-level schema must be an object" )
108
+ when (schema) {
109
+ is Struct -> schema.properties
110
+ is Union -> schema.asColumns()
111
+ else -> throw IllegalArgumentException (" Top-level schema must be an object" )
117
112
}
118
113
119
114
require(! stream.primaryKey.stream().anyMatch { key: List <String ?> -> key.size > 1 }) {
@@ -126,21 +121,38 @@ constructor(
126
121
.toList()
127
122
128
123
require(stream.cursorField.size <= 1 ) { " Only top-level cursors are supported" }
129
- val cursor: Optional <ColumnId >
130
- if (stream.cursorField.size > 0 ) {
131
- cursor = Optional .of(sqlGenerator.buildColumnId(stream.cursorField[0 ])!! )
132
- } else {
133
- cursor = Optional .empty()
134
- }
124
+ val cursor: Optional <ColumnId > =
125
+ if (stream.cursorField.isNotEmpty()) {
126
+ Optional .of(sqlGenerator.buildColumnId(stream.cursorField[0 ]))
127
+ } else {
128
+ Optional .empty()
129
+ }
130
+
131
+ val columns = resolveColumnCollisions(airbyteColumns, stream)
132
+
133
+ return StreamConfig (
134
+ sqlGenerator.buildStreamId(stream.stream.namespace, stream.stream.name, rawNamespace),
135
+ stream.syncMode,
136
+ stream.destinationSyncMode,
137
+ primaryKey,
138
+ cursor,
139
+ columns
140
+ )
141
+ }
135
142
136
- // this code is really bad and I'm not convinced we need to preserve this behavior.
137
- // as with the tablename collisions thing above - we're trying to preserve legacy
138
- // normalization's
139
- // naming conventions here.
143
+ /* *
144
+ * This code is really bad and I'm not convinced we need to preserve this behavior. As with the
145
+ * tablename collisions thing above - we're trying to preserve legacy normalization's naming
146
+ * conventions here.
147
+ */
148
+ private fun resolveColumnCollisions (
149
+ airbyteColumns : LinkedHashMap <String , AirbyteType >,
150
+ stream : ConfiguredAirbyteStream
151
+ ): LinkedHashMap <ColumnId , AirbyteType > {
140
152
val columns = LinkedHashMap <ColumnId , AirbyteType >()
141
153
for ((key, value) in airbyteColumns) {
142
154
val originalColumnId = sqlGenerator.buildColumnId(key)
143
- var columnId: ColumnId ?
155
+ var columnId: ColumnId
144
156
if (
145
157
columns.keys.stream().noneMatch { c: ColumnId ->
146
158
c.canonicalName == originalColumnId.canonicalName
@@ -154,14 +166,31 @@ constructor(
154
166
" Detected column name collision for {}.{}.{}" ,
155
167
stream.stream.namespace,
156
168
stream.stream.name,
157
- key
169
+ key,
158
170
)
159
171
// One of the existing columns has the same name. We need to handle this collision.
160
172
// Append _1, _2, _3, ... to the column name until we find one that doesn't collide.
161
173
var i = 1
162
174
while (true ) {
163
175
columnId = sqlGenerator.buildColumnId(key, " _$i " )
164
- val canonicalName = columnId!! .canonicalName
176
+
177
+ // Verify that we're making progress, e.g. we haven't immediately truncated away
178
+ // the suffix.
179
+ if (columnId.canonicalName == originalColumnId.canonicalName) {
180
+ // If we're not making progress, do a more powerful mutation instead of
181
+ // appending numbers.
182
+ // Assume that we're being truncated, and that the column ID's name is the
183
+ // maximum length.
184
+ columnId =
185
+ superResolveColumnCollisions(
186
+ originalColumnId,
187
+ columns,
188
+ originalColumnId.name.length
189
+ )
190
+ break
191
+ }
192
+
193
+ val canonicalName = columnId.canonicalName
165
194
if (
166
195
columns.keys.stream().noneMatch { c: ColumnId ->
167
196
c.canonicalName == canonicalName
@@ -176,23 +205,64 @@ constructor(
176
205
// JSON records.
177
206
columnId =
178
207
ColumnId (
179
- columnId!! .name,
180
- originalColumnId!! .originalName,
181
- columnId.canonicalName
208
+ columnId.name,
209
+ originalColumnId.originalName,
210
+ columnId.canonicalName,
182
211
)
183
212
}
184
213
185
214
columns[columnId] = value
186
215
}
216
+ return columns
217
+ }
187
218
188
- return StreamConfig (
189
- sqlGenerator.buildStreamId(stream.stream.namespace, stream.stream.name, rawNamespace),
190
- stream.syncMode,
191
- stream.destinationSyncMode,
192
- primaryKey,
193
- cursor,
194
- columns
195
- )
219
+ /* *
220
+ * Generate a name of the format `<prefix><length><suffix>`. E.g. for affixLength=3:
221
+ * "veryLongName" -> "ver6ame" This is based on the "i18n"-ish naming convention.
222
+ *
223
+ * @param columnId The column that we're trying to add
224
+ * @param columns The columns that we've already added
225
+ */
226
+ private fun superResolveColumnCollisions (
227
+ columnId : ColumnId ,
228
+ columns : LinkedHashMap <ColumnId , AirbyteType >,
229
+ maximumColumnNameLength : Int
230
+ ): ColumnId {
231
+ val originalColumnName = columnId.originalName
232
+
233
+ var newColumnId = columnId
234
+ // Assume that the <length> portion can be expressed in at most 5 characters.
235
+ // If someone is giving us a column name that's longer than 99999 characters,
236
+ // that's just being silly.
237
+ val affixLength = (maximumColumnNameLength - 5 ) / 2
238
+ // If, after reserving 5 characters for the length, we can't fit the affixes,
239
+ // just give up. That means the destination is trying to restrict us to a
240
+ // 6-character column name, which is just silly.
241
+ if (affixLength <= 0 ) {
242
+ throw IllegalArgumentException (
243
+ " Cannot solve column name collision: ${newColumnId.originalName} . We recommend removing this column to continue syncing."
244
+ )
245
+ }
246
+ val prefix = originalColumnName.substring(0 , affixLength)
247
+ val suffix =
248
+ originalColumnName.substring(
249
+ originalColumnName.length - affixLength,
250
+ originalColumnName.length
251
+ )
252
+ val length = originalColumnName.length - 2 * affixLength
253
+ newColumnId = sqlGenerator.buildColumnId(" $prefix$length$suffix " )
254
+ // if there's _still_ a collision after this, just give up.
255
+ // we could try to be more clever, but this is already a pretty rare case.
256
+ if (
257
+ columns.keys.stream().anyMatch { c: ColumnId ->
258
+ c.canonicalName == newColumnId.canonicalName
259
+ }
260
+ ) {
261
+ throw IllegalArgumentException (
262
+ " Cannot solve column name collision: ${newColumnId.originalName} . We recommend removing this column to continue syncing."
263
+ )
264
+ }
265
+ return newColumnId
196
266
}
197
267
198
268
companion object {
0 commit comments