Skip to content

Commit 8803c2e

Browse files
committed
add tests
1 parent 5232c81 commit 8803c2e

File tree

4 files changed

+317
-183
lines changed

4 files changed

+317
-183
lines changed

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/AbstractStreamOperation.kt

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -132,19 +132,17 @@ abstract class AbstractStreamOperation<DestinationState : MinimumDestinationStat
132132
} else {
133133
// In overwrite mode, we want to read all the raw records. Typically, this is equivalent
134134
// to filtering on timestamp, but might as well be explicit.
135-
val timestampFilter = if (isOverwriteSync) {
136-
initialRawTableStatus.maxProcessedTimestamp
137-
} else {
138-
Optional.empty()
139-
}
140-
storageOperations.typeAndDedupe(
141-
streamConfig,
142-
timestampFilter,
143-
finalTmpTableSuffix
144-
)
135+
val timestampFilter =
136+
if (isOverwriteSync) {
137+
initialRawTableStatus.maxProcessedTimestamp
138+
} else {
139+
Optional.empty()
140+
}
141+
storageOperations.typeAndDedupe(streamConfig, timestampFilter, finalTmpTableSuffix)
145142
}
146143

147-
// For overwrite, it's wasteful to do T+D, so we don't do soft-reset in prepare. Instead, we do
144+
// For overwrite, it's wasteful to do T+D, so we don't do soft-reset in prepare. Instead, we
145+
// do
148146
// type-dedupe
149147
// on a suffixed table and do a swap here when we have to for schema mismatches
150148
if (

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/main/kotlin/io/airbyte/integrations/base/destination/operation/DefaultSyncOperation.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,16 @@ class DefaultSyncOperation<DestinationState : MinimumDestinationState>(
4949
log.info { "Preparing required schemas and tables for all streams" }
5050
val streamsInitialStates = destinationHandler.gatherInitialState(parsedCatalog.streams)
5151

52-
// we will commit destinationStates and run Migrations here.
5352
val postMigrationInitialStates =
5453
tdutils.executeRawTableMigrations(
5554
executorService,
5655
destinationHandler,
5756
migrations,
5857
streamsInitialStates
5958
)
59+
destinationHandler.commitDestinationStates(
60+
postMigrationInitialStates.associate { it.streamConfig.id to it.destinationState }
61+
)
6062

6163
val initializationFutures =
6264
postMigrationInitialStates

airbyte-cdk/java/airbyte-cdk/typing-deduping/src/test/kotlin/io/airbyte/integrations/base/destination/operation/AbstractStreamOperationTest.kt

Lines changed: 164 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
/*
2+
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3+
*/
4+
15
package io.airbyte.integrations.base.destination.operation
26

37
import io.airbyte.cdk.integrations.destination.StreamSyncSummary
@@ -27,14 +31,27 @@ import org.junit.jupiter.params.provider.Arguments
2731
import org.junit.jupiter.params.provider.MethodSource
2832
import org.junit.jupiter.params.provider.ValueSource
2933

34+
/**
35+
* Verify that [AbstractStreamOperation] behaves correctly, given various initial states. We
36+
* intentionally mock the [DestinationInitialStatus]. This allows us to verify that the stream ops
37+
* only looks at specific fields - the mocked initial statuses will throw exceptions for unstubbed
38+
* methods.
39+
*
40+
* For example, we don't need to write separate test cases for "final table does not exist and
41+
* destination state has softReset=true/false" - instead we have a single test case for "final table
42+
* does not exist", and it doesn't stub the `needsSoftReset` method. If we introduce a bug in stream
43+
* ops and it starts checking needsSoftReset even though the final table doesn't exist, then these
44+
* tests will start failing.
45+
*/
3046
class AbstractStreamOperationTest {
3147
class TestStreamOperation(
3248
storageOperations: StorageOperations,
3349
destinationInitialStatus: DestinationInitialStatus<MinimumDestinationState.Impl>
34-
) : AbstractStreamOperation<MinimumDestinationState.Impl>(
35-
storageOperations,
36-
destinationInitialStatus,
37-
) {
50+
) :
51+
AbstractStreamOperation<MinimumDestinationState.Impl>(
52+
storageOperations,
53+
destinationInitialStatus,
54+
) {
3855
override fun writeRecords(
3956
streamConfig: StreamConfig,
4057
stream: Stream<PartialAirbyteMessage>
@@ -49,18 +66,20 @@ class AbstractStreamOperationTest {
4966

5067
@Nested
5168
inner class Overwrite {
52-
private val streamConfig = StreamConfig(
53-
streamId,
54-
DestinationSyncMode.OVERWRITE,
55-
listOf(),
56-
Optional.empty(),
57-
columns,
58-
// TODO currently these values are unused. Eventually we should restructure this class
59-
// to test based on generation ID instead of sync mode.
60-
0,
61-
0,
62-
0
63-
)
69+
private val streamConfig =
70+
StreamConfig(
71+
streamId,
72+
DestinationSyncMode.OVERWRITE,
73+
listOf(),
74+
Optional.empty(),
75+
columns,
76+
// TODO currently these values are unused. Eventually we should restructure this
77+
// class
78+
// to test based on generation ID instead of sync mode.
79+
0,
80+
0,
81+
0
82+
)
6483

6584
@Test
6685
fun emptyDestination() {
@@ -204,7 +223,8 @@ class AbstractStreamOperationTest {
204223
every { initialState.initialRawTableStatus } returns mockk<InitialRawTableStatus>()
205224
// This is an overwrite sync, so we can ignore the old raw records.
206225
// We should skip T+D if the current sync emitted 0 records.
207-
every { initialState.initialRawTableStatus.hasUnprocessedRecords } returns hasUnprocessedRecords
226+
every { initialState.initialRawTableStatus.hasUnprocessedRecords } returns
227+
hasUnprocessedRecords
208228
every { initialState.isFinalTablePresent } returns true
209229
every { initialState.isFinalTableEmpty } returns false
210230

@@ -232,11 +252,14 @@ class AbstractStreamOperationTest {
232252
@Nested
233253
inner class NonOverwrite {
234254
@ParameterizedTest
235-
@MethodSource("io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs")
255+
@MethodSource(
256+
"io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs"
257+
)
236258
fun emptyDestination(streamConfig: StreamConfig) {
237259
val initialState = mockk<DestinationInitialStatus<MinimumDestinationState.Impl>>()
238260
every { initialState.streamConfig } returns streamConfig
239-
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns Optional.empty()
261+
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns
262+
Optional.empty()
240263
every { initialState.isFinalTablePresent } returns false
241264

242265
val streamOperations = TestStreamOperation(storageOperations, initialState)
@@ -264,11 +287,14 @@ class AbstractStreamOperationTest {
264287
}
265288

266289
@ParameterizedTest
267-
@MethodSource("io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs")
290+
@MethodSource(
291+
"io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs"
292+
)
268293
fun existingTableSchemaMismatch(streamConfig: StreamConfig) {
269294
val initialState = mockk<DestinationInitialStatus<MinimumDestinationState.Impl>>()
270295
every { initialState.streamConfig } returns streamConfig
271-
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns Optional.empty()
296+
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns
297+
Optional.empty()
272298
every { initialState.isFinalTablePresent } returns true
273299
every { initialState.isSchemaMismatch } returns true
274300

@@ -297,11 +323,14 @@ class AbstractStreamOperationTest {
297323
}
298324

299325
@ParameterizedTest
300-
@MethodSource("io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs")
326+
@MethodSource(
327+
"io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs"
328+
)
301329
fun existingTableSchemaMatch(streamConfig: StreamConfig) {
302330
val initialState = mockk<DestinationInitialStatus<MinimumDestinationState.Impl>>()
303331
every { initialState.streamConfig } returns streamConfig
304-
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns Optional.empty()
332+
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns
333+
Optional.empty()
305334
every { initialState.isFinalTablePresent } returns true
306335
every { initialState.isSchemaMismatch } returns false
307336
every { initialState.destinationState } returns MinimumDestinationState.Impl(false)
@@ -331,18 +360,63 @@ class AbstractStreamOperationTest {
331360
}
332361

333362
@ParameterizedTest
334-
@MethodSource("io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigsAndBoolean")
335-
fun existingNonEmptyTableNoNewRecords(streamConfig: StreamConfig, hasUnprocessedRecords: Boolean) {
363+
@MethodSource(
364+
"io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigs"
365+
)
366+
fun existingTableAndStateRequiresSoftReset(streamConfig: StreamConfig) {
367+
val initialState = mockk<DestinationInitialStatus<MinimumDestinationState.Impl>>()
368+
every { initialState.streamConfig } returns streamConfig
369+
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns
370+
Optional.empty()
371+
every { initialState.isFinalTablePresent } returns true
372+
every { initialState.isSchemaMismatch } returns false
373+
every { initialState.destinationState } returns MinimumDestinationState.Impl(true)
374+
375+
val streamOperations = TestStreamOperation(storageOperations, initialState)
376+
377+
verifySequence {
378+
storageOperations.prepareStage(streamId, streamConfig.destinationSyncMode)
379+
storageOperations.createFinalSchema(streamId)
380+
storageOperations.softResetFinalTable(streamConfig)
381+
}
382+
confirmVerified(storageOperations)
383+
384+
clearMocks(storageOperations)
385+
streamOperations.finalizeTable(streamConfig, StreamSyncSummary(Optional.of(42)))
386+
387+
verifySequence {
388+
storageOperations.cleanupStage(streamId)
389+
storageOperations.typeAndDedupe(
390+
streamConfig,
391+
Optional.empty(),
392+
"",
393+
)
394+
}
395+
confirmVerified(storageOperations)
396+
checkUnnecessaryStub(initialState, initialState.initialRawTableStatus)
397+
}
398+
399+
@ParameterizedTest
400+
@MethodSource(
401+
"io.airbyte.integrations.base.destination.operation.AbstractStreamOperationTest#nonOverwriteStreamConfigsAndBoolean"
402+
)
403+
fun existingNonEmptyTableNoNewRecords(
404+
streamConfig: StreamConfig,
405+
hasUnprocessedRecords: Boolean
406+
) {
336407
val initialState = mockk<DestinationInitialStatus<MinimumDestinationState.Impl>>()
337408
every { initialState.streamConfig } returns streamConfig
338409
// This is an overwrite sync, so we can ignore the old raw records.
339410
// We should skip T+D if the current sync emitted 0 records.
340-
every { initialState.initialRawTableStatus.hasUnprocessedRecords } returns hasUnprocessedRecords
411+
every { initialState.initialRawTableStatus.hasUnprocessedRecords } returns
412+
hasUnprocessedRecords
341413
if (hasUnprocessedRecords) {
342414
// We only care about this value if we're executing T+D.
343-
// If there are no unprocessed records from a previous sync, and no new records from this sync,
415+
// If there are no unprocessed records from a previous sync, and no new records from
416+
// this sync,
344417
// we don't need to set it.
345-
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns maxProcessedTimestamp
418+
every { initialState.initialRawTableStatus.maxProcessedTimestamp } returns
419+
maxProcessedTimestamp
346420
}
347421
every { initialState.isFinalTablePresent } returns true
348422
every { initialState.isSchemaMismatch } returns false
@@ -361,7 +435,8 @@ class AbstractStreamOperationTest {
361435

362436
verifySequence {
363437
storageOperations.cleanupStage(streamId)
364-
// If this sync emitted no records, we only need to run T+D if a previous sync emitted
438+
// If this sync emitted no records, we only need to run T+D if a previous sync
439+
// emitted
365440
// some records but failed to run T+D.
366441
if (hasUnprocessedRecords) {
367442
storageOperations.typeAndDedupe(streamConfig, maxProcessedTimestamp, "")
@@ -373,73 +448,81 @@ class AbstractStreamOperationTest {
373448
}
374449

375450
companion object {
376-
val streamId = StreamId(
377-
"final_namespace",
378-
"final_name",
379-
"raw_namespace",
380-
"raw_name",
381-
"original_namespace",
382-
"original_name",
383-
)
451+
val streamId =
452+
StreamId(
453+
"final_namespace",
454+
"final_name",
455+
"raw_namespace",
456+
"raw_name",
457+
"original_namespace",
458+
"original_name",
459+
)
384460
private val pk1 = ColumnId("pk1", "pk1_original_name", "pk1_canonical_name")
385461
private val pk2 = ColumnId("pk2", "pk2_original_name", "pk2_canonical_name")
386462
private val cursor = ColumnId("cursor", "cursor_original_name", "cursor_canonical_name")
387-
val columns: LinkedHashMap<ColumnId, AirbyteType> = linkedMapOf(
388-
pk1 to AirbyteProtocolType.INTEGER,
389-
pk2 to AirbyteProtocolType.STRING,
390-
cursor to AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE,
391-
ColumnId(
392-
"username",
393-
"username_original_name",
394-
"username_canonical_name",
395-
) to AirbyteProtocolType.STRING,
396-
)
463+
val columns: LinkedHashMap<ColumnId, AirbyteType> =
464+
linkedMapOf(
465+
pk1 to AirbyteProtocolType.INTEGER,
466+
pk2 to AirbyteProtocolType.STRING,
467+
cursor to AirbyteProtocolType.TIMESTAMP_WITH_TIMEZONE,
468+
ColumnId(
469+
"username",
470+
"username_original_name",
471+
"username_canonical_name",
472+
) to AirbyteProtocolType.STRING,
473+
)
397474

398475
const val EXPECTED_OVERWRITE_SUFFIX = "_airbyte_tmp"
399476
val maxProcessedTimestamp = Optional.of(Instant.parse("2024-01-23T12:34:56Z"))
400477

401-
private val appendStreamConfig = StreamConfig(
402-
streamId,
403-
DestinationSyncMode.APPEND,
404-
listOf(),
405-
Optional.empty(),
406-
columns,
407-
// TODO currently these values are unused. Eventually we should restructure this class
408-
// to test based on generation ID instead of sync mode.
409-
0,
410-
0,
411-
0
412-
)
413-
private val dedupStreamConfig = StreamConfig(
414-
streamId,
415-
DestinationSyncMode.APPEND_DEDUP,
416-
listOf(pk1, pk2),
417-
Optional.of(cursor),
418-
columns,
419-
// TODO currently these values are unused. Eventually we should restructure this class
420-
// to test based on generation ID instead of sync mode.
421-
0,
422-
0,
423-
0
424-
)
478+
private val appendStreamConfig =
479+
StreamConfig(
480+
streamId,
481+
DestinationSyncMode.APPEND,
482+
listOf(),
483+
Optional.empty(),
484+
columns,
485+
// TODO currently these values are unused. Eventually we should restructure this
486+
// class
487+
// to test based on generation ID instead of sync mode.
488+
0,
489+
0,
490+
0
491+
)
492+
private val dedupStreamConfig =
493+
StreamConfig(
494+
streamId,
495+
DestinationSyncMode.APPEND_DEDUP,
496+
listOf(pk1, pk2),
497+
Optional.of(cursor),
498+
columns,
499+
// TODO currently these values are unused. Eventually we should restructure this
500+
// class
501+
// to test based on generation ID instead of sync mode.
502+
0,
503+
0,
504+
0
505+
)
425506

426507
// junit 5 doesn't support class-level parameterization...
427508
// so we have to hack this in a somewhat dumb way.
428509
// append and dedup should behave identically from StreamOperations' POV,
429510
// so just shove them together.
430511
@JvmStatic
431-
fun nonOverwriteStreamConfigs(): Stream<Arguments> = Stream.of(
432-
Arguments.of(appendStreamConfig),
433-
Arguments.of(dedupStreamConfig),
434-
)
512+
fun nonOverwriteStreamConfigs(): Stream<Arguments> =
513+
Stream.of(
514+
Arguments.of(appendStreamConfig),
515+
Arguments.of(dedupStreamConfig),
516+
)
435517

436518
// Some tests are further parameterized, which this method supports.
437519
@JvmStatic
438-
fun nonOverwriteStreamConfigsAndBoolean(): Stream<Arguments> = Stream.of(
439-
Arguments.of(appendStreamConfig, true),
440-
Arguments.of(appendStreamConfig, false),
441-
Arguments.of(dedupStreamConfig, true),
442-
Arguments.of(dedupStreamConfig, false),
443-
)
520+
fun nonOverwriteStreamConfigsAndBoolean(): Stream<Arguments> =
521+
Stream.of(
522+
Arguments.of(appendStreamConfig, true),
523+
Arguments.of(appendStreamConfig, false),
524+
Arguments.of(dedupStreamConfig, true),
525+
Arguments.of(dedupStreamConfig, false),
526+
)
444527
}
445528
}

0 commit comments

Comments
 (0)