You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardExpand all lines: airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/AbstractStreamOperation.kt
+106-14
Original file line number
Diff line number
Diff line change
@@ -54,9 +54,9 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
"${stream.id.originalNamespace}.${stream.id.originalName}: truncate sync, and existing temp raw table belongs to generation $tempStageGeneration (!= current generation ${stream.generationId}). Truncating it."
@@ -156,18 +167,67 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
156
167
TMP_TABLE_SUFFIX,
157
168
replace =true,
158
169
)
170
+
// We nuked the temp raw table, so create a new initial raw table status.
// It's possible to "resume" a truncate sync that was previously already finalized.
182
+
// In this case, there is no existing temp raw table, and there is a real raw table
183
+
// which already belongs to the correct generation.
184
+
// Check for that case now.
185
+
val realStageGeneration = storageOperation.getStageGeneration(stream.id, NO_SUFFIX)
186
+
if (realStageGeneration ==null|| realStageGeneration == stream.generationId) {
187
+
log.info {
188
+
"${stream.id.originalNamespace}.${stream.id.originalName}: truncate sync, no existing temp raw table, and existing real raw table belongs to generation $realStageGeneration (== current generation ${stream.generationId}). Retaining it."
189
+
}
190
+
// The real raw table is from the correct generation. Set up any other resources
191
+
// (staging file, etc.), but leave the table untouched.
"${stream.id.originalNamespace}.${stream.id.originalName}: truncate sync, existing real raw table belongs to generation $realStageGeneration (!= current generation ${stream.generationId}), and no preexisting temp raw table. Creating a temp raw table."
197
+
}
198
+
// We're initiating a new truncate refresh. Create a new temp stage.
199
+
storageOperation.prepareStage(
200
+
stream.id,
201
+
TMP_TABLE_SUFFIX,
202
+
)
203
+
returnPair(
204
+
// Create a fresh raw table status, since we created a fresh temp stage.
205
+
InitialRawTableStatus(
206
+
rawTableExists =true,
207
+
hasUnprocessedRecords =false,
208
+
maxProcessedTimestamp =Optional.empty(),
209
+
),
210
+
TMP_TABLE_SUFFIX,
211
+
)
159
212
}
160
-
// (if the existing temp stage is from the correct generation, then we're resuming
161
-
// a truncate refresh, and should keep the previous temp stage).
162
213
} else {
163
214
log.info {
164
-
"${stream.id.originalNamespace}.${stream.id.originalName}: truncate sync, and no preexisting temp raw table. Creating it."
215
+
"${stream.id.originalNamespace}.${stream.id.originalName}: truncate sync, and no preexisting temp or raw table. Creating a temp raw table."
165
216
}
166
217
// We're initiating a new truncate refresh. Create a new temp stage.
167
218
storageOperation.prepareStage(
168
219
stream.id,
169
220
TMP_TABLE_SUFFIX,
170
221
)
222
+
returnPair(
223
+
// Create a fresh raw table status, since we created a fresh temp stage.
224
+
InitialRawTableStatus(
225
+
rawTableExists =true,
226
+
hasUnprocessedRecords =false,
227
+
maxProcessedTimestamp =Optional.empty(),
228
+
),
229
+
TMP_TABLE_SUFFIX,
230
+
)
171
231
}
172
232
}
173
233
@@ -188,8 +248,39 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
188
248
// The table already exists. Decide whether we're writing to it directly, or
if (initialStatus.isSchemaMismatch || initialStatus.destinationState.needsSoftReset()) {
195
286
// We're loading data directly into the existing table.
@@ -257,14 +348,14 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
257
348
// which is possible (`typeAndDedupe(streamConfig.id.copy(rawName = streamConfig.id.rawName
258
349
// + suffix))`
259
350
// but annoying and confusing.
260
-
if (isTruncateSync && streamSuccessful) {
351
+
if (isTruncateSync && streamSuccessful&& rawTableSuffix.isNotEmpty()) {
261
352
log.info {
262
-
"Overwriting raw table for ${streamConfig.id.originalNamespace}.${streamConfig.id.originalName} because this is a truncate sync and we received a stream success message."
353
+
"Overwriting raw table for ${streamConfig.id.originalNamespace}.${streamConfig.id.originalName} because this is a truncate sync, we received a stream success message, and are using a temporary raw table."
"Not overwriting raw table for ${streamConfig.id.originalNamespace}.${streamConfig.id.originalName}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful"
358
+
"Not overwriting raw table for ${streamConfig.id.originalNamespace}.${streamConfig.id.originalName}. Truncate sync: $isTruncateSync; stream success: $streamSuccessful; raw table suffix: \"$rawTableSuffix\""
268
359
}
269
360
}
270
361
@@ -303,10 +394,11 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
303
394
"Skipping typing and deduping for stream ${streamConfig.id.originalNamespace}.${streamConfig.id.originalName} running as truncate sync. Stream success: $streamSuccessful; records written: ${syncSummary.recordsWritten}; temp raw table already existed: ${initialRawTableStatus.rawTableExists}; temp raw table had records: ${initialRawTableStatus.hasUnprocessedRecords}"
304
395
}
305
396
} else {
306
-
// In truncate mode, we want to read all the raw records. Typically, this is equivalent
397
+
// When targeting the temp final table, we want to read all the raw records
398
+
// because the temp final table is always a full rebuild. Typically, this is equivalent
307
399
// to filtering on timestamp, but might as well be explicit.
0 commit comments