Skip to content

Commit 6d5ecca

Browse files
authored
Destination CDK: Simplify AsyncStreamConsumer constructors (#37106)
1 parent faad484 commit 6d5ecca

File tree

8 files changed

+62
-146
lines changed

8 files changed

+62
-146
lines changed

airbyte-cdk/java/airbyte-cdk/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ Maven and Gradle will automatically reference the correct (pinned) version of th
144144

145145
| Version | Date | Pull Request | Subject |
146146
|:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
147+
| 0.30.3 | 2024-04-12 | [\#37106](https://github.com/airbytehq/airbyte/pull/37106) | Destinations: Simplify constructors in `AsyncStreamConsumer` |
147148
| 0.30.2 | 2024-04-12 | [\#36926](https://github.com/airbytehq/airbyte/pull/36926) | Destinations: Remove `JdbcSqlOperations#formatData`; misc changes for java interop |
148149
| 0.30.1 | 2024-04-11 | [\#36919](https://github.com/airbytehq/airbyte/pull/36919) | Fix regression in sources conversion of null values |
149150
| 0.30.0 | 2024-04-11 | [\#36974](https://github.com/airbytehq/airbyte/pull/36974) | Destinations: Pass config to jdbc sqlgenerator; allow cascade drop |

airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumer.kt

+22-94
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@
44

55
package io.airbyte.cdk.integrations.destination.async
66

7-
import com.google.common.annotations.VisibleForTesting
87
import com.google.common.base.Preconditions
98
import com.google.common.base.Strings
109
import io.airbyte.cdk.integrations.base.SerializedAirbyteMessageConsumer
1110
import io.airbyte.cdk.integrations.destination.StreamSyncSummary
1211
import io.airbyte.cdk.integrations.destination.async.buffers.BufferEnqueue
1312
import io.airbyte.cdk.integrations.destination.async.buffers.BufferManager
14-
import io.airbyte.cdk.integrations.destination.async.deser.DeserializationUtil
15-
import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer
16-
import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer
13+
import io.airbyte.cdk.integrations.destination.async.deser.AirbyteMessageDeserializer
1714
import io.airbyte.cdk.integrations.destination.async.function.DestinationFlushFunction
1815
import io.airbyte.cdk.integrations.destination.async.model.PartialAirbyteMessage
1916
import io.airbyte.cdk.integrations.destination.async.state.FlushFailure
@@ -44,26 +41,23 @@ private val logger = KotlinLogging.logger {}
4441
* memory limit governed by [GlobalMemoryManager]. Record writing is decoupled via [FlushWorkers].
4542
* See the other linked class for more detail.
4643
*/
47-
class AsyncStreamConsumer
48-
@VisibleForTesting
49-
constructor(
44+
class AsyncStreamConsumer(
5045
outputRecordCollector: Consumer<AirbyteMessage>,
5146
private val onStart: OnStartFunction,
5247
private val onClose: OnCloseFunction,
53-
flusher: DestinationFlushFunction,
48+
onFlush: DestinationFlushFunction,
5449
private val catalog: ConfiguredAirbyteCatalog,
5550
private val bufferManager: BufferManager,
56-
private val flushFailure: FlushFailure,
5751
private val defaultNamespace: Optional<String>,
58-
workerPool: ExecutorService,
59-
private val dataTransformer: StreamAwareDataTransformer,
60-
private val deserializationUtil: DeserializationUtil,
52+
private val flushFailure: FlushFailure = FlushFailure(),
53+
workerPool: ExecutorService = Executors.newFixedThreadPool(5),
54+
private val airbyteMessageDeserializer: AirbyteMessageDeserializer,
6155
) : SerializedAirbyteMessageConsumer {
6256
private val bufferEnqueue: BufferEnqueue = bufferManager.bufferEnqueue
6357
private val flushWorkers: FlushWorkers =
6458
FlushWorkers(
6559
bufferManager.bufferDequeue,
66-
flusher,
60+
onFlush,
6761
outputRecordCollector,
6862
flushFailure,
6963
bufferManager.stateManager,
@@ -81,73 +75,7 @@ constructor(
8175
private var hasClosed = false
8276
private var hasFailed = false
8377

84-
constructor(
85-
outputRecordCollector: Consumer<AirbyteMessage>,
86-
onStart: OnStartFunction,
87-
onClose: OnCloseFunction,
88-
flusher: DestinationFlushFunction,
89-
catalog: ConfiguredAirbyteCatalog,
90-
bufferManager: BufferManager,
91-
defaultNamespace: Optional<String>,
92-
) : this(
93-
outputRecordCollector,
94-
onStart,
95-
onClose,
96-
flusher,
97-
catalog,
98-
bufferManager,
99-
FlushFailure(),
100-
defaultNamespace,
101-
)
102-
103-
constructor(
104-
outputRecordCollector: Consumer<AirbyteMessage>,
105-
onStart: OnStartFunction,
106-
onClose: OnCloseFunction,
107-
flusher: DestinationFlushFunction,
108-
catalog: ConfiguredAirbyteCatalog,
109-
bufferManager: BufferManager,
110-
defaultNamespace: Optional<String>,
111-
dataTransformer: StreamAwareDataTransformer,
112-
) : this(
113-
outputRecordCollector,
114-
onStart,
115-
onClose,
116-
flusher,
117-
catalog,
118-
bufferManager,
119-
FlushFailure(),
120-
defaultNamespace,
121-
Executors.newFixedThreadPool(5),
122-
dataTransformer,
123-
DeserializationUtil(),
124-
)
125-
126-
constructor(
127-
outputRecordCollector: Consumer<AirbyteMessage>,
128-
onStart: OnStartFunction,
129-
onClose: OnCloseFunction,
130-
flusher: DestinationFlushFunction,
131-
catalog: ConfiguredAirbyteCatalog,
132-
bufferManager: BufferManager,
133-
defaultNamespace: Optional<String>,
134-
workerPool: ExecutorService,
135-
) : this(
136-
outputRecordCollector,
137-
onStart,
138-
onClose,
139-
flusher,
140-
catalog,
141-
bufferManager,
142-
FlushFailure(),
143-
defaultNamespace,
144-
workerPool,
145-
IdentityDataTransformer(),
146-
DeserializationUtil(),
147-
)
148-
149-
@VisibleForTesting
150-
constructor(
78+
internal constructor(
15179
outputRecordCollector: Consumer<AirbyteMessage>,
15280
onStart: OnStartFunction,
15381
onClose: OnCloseFunction,
@@ -163,11 +91,10 @@ constructor(
16391
flusher,
16492
catalog,
16593
bufferManager,
166-
flushFailure,
16794
defaultNamespace,
95+
flushFailure,
16896
Executors.newFixedThreadPool(5),
169-
IdentityDataTransformer(),
170-
DeserializationUtil(),
97+
AirbyteMessageDeserializer(),
17198
)
17299

173100
@Throws(Exception::class)
@@ -183,7 +110,7 @@ constructor(
183110

184111
@Throws(Exception::class)
185112
override fun accept(
186-
messageString: String,
113+
message: String,
187114
sizeInBytes: Int,
188115
) {
189116
Preconditions.checkState(hasStarted, "Cannot accept records until consumer has started")
@@ -193,21 +120,22 @@ constructor(
193120
* to try to use a thread pool to partially deserialize to get record type and stream name, we can
194121
* do it without touching buffer manager.
195122
*/
196-
val message =
197-
deserializationUtil.deserializeAirbyteMessage(
198-
messageString,
199-
dataTransformer,
123+
val partialAirbyteMessage =
124+
airbyteMessageDeserializer.deserializeAirbyteMessage(
125+
message,
200126
)
201-
if (AirbyteMessage.Type.RECORD == message.type) {
202-
if (Strings.isNullOrEmpty(message.record?.namespace)) {
203-
message.record?.namespace = defaultNamespace.getOrNull()
127+
if (AirbyteMessage.Type.RECORD == partialAirbyteMessage.type) {
128+
if (Strings.isNullOrEmpty(partialAirbyteMessage.record?.namespace)) {
129+
partialAirbyteMessage.record?.namespace = defaultNamespace.getOrNull()
204130
}
205-
validateRecord(message)
131+
validateRecord(partialAirbyteMessage)
206132

207-
message.record?.streamDescriptor?.let { getRecordCounter(it).incrementAndGet() }
133+
partialAirbyteMessage.record?.streamDescriptor?.let {
134+
getRecordCounter(it).incrementAndGet()
135+
}
208136
}
209137
bufferEnqueue.addRecord(
210-
message,
138+
partialAirbyteMessage,
211139
sizeInBytes + PARTIAL_DESERIALIZE_REF_BYTES,
212140
defaultNamespace,
213141
)

airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/deser/DeserializationUtil.kt airbyte-cdk/java/airbyte-cdk/core/src/main/kotlin/io/airbyte/cdk/integrations/destination/async/deser/AirbyteMessageDeserializer.kt

+9-7
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@ import io.airbyte.cdk.integrations.destination.async.model.PartialAirbyteMessage
77
import io.airbyte.commons.json.Jsons
88
import io.airbyte.protocol.models.v0.AirbyteMessage
99

10-
class DeserializationUtil {
10+
class AirbyteMessageDeserializer(
11+
private val dataTransformer: StreamAwareDataTransformer = IdentityDataTransformer(),
12+
) {
1113
/**
1214
* Deserializes to a [PartialAirbyteMessage] which can represent both a Record or a State
1315
* Message
@@ -16,20 +18,20 @@ class DeserializationUtil {
1618
* * entire serialized message string when message is a valid State Message
1719
* * serialized AirbyteRecordMessage when message is a valid Record Message
1820
*
19-
* @param messageString the string to deserialize
21+
* @param message the string to deserialize
2022
* @return PartialAirbyteMessage if the message is valid, empty otherwise
2123
*/
2224
fun deserializeAirbyteMessage(
23-
messageString: String?,
24-
dataTransformer: StreamAwareDataTransformer,
25+
message: String?,
2526
): PartialAirbyteMessage {
2627
// TODO: This is doing some sketchy assumptions by deserializing either the whole or the
2728
// partial based on type.
2829
// Use JsonSubTypes and extend StdDeserializer to properly handle this.
2930
// Make immutability a first class citizen in the PartialAirbyteMessage class.
3031
val partial =
31-
Jsons.tryDeserializeExact(messageString, PartialAirbyteMessage::class.java)
32-
.orElseThrow { RuntimeException("Unable to deserialize PartialAirbyteMessage.") }
32+
Jsons.tryDeserializeExact(message, PartialAirbyteMessage::class.java).orElseThrow {
33+
RuntimeException("Unable to deserialize PartialAirbyteMessage.")
34+
}
3335

3436
val msgType = partial.type
3537
if (AirbyteMessage.Type.RECORD == msgType && partial.record?.data != null) {
@@ -50,7 +52,7 @@ class DeserializationUtil {
5052
// usage.
5153
partial.record?.data = null
5254
} else if (AirbyteMessage.Type.STATE == msgType) {
53-
partial.withSerialized(messageString)
55+
partial.withSerialized(message)
5456
} else {
5557
throw RuntimeException(String.format("Unsupported message type: %s", msgType))
5658
}
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.30.2
1+
version=0.30.3

airbyte-cdk/java/airbyte-cdk/core/src/test/kotlin/io/airbyte/cdk/integrations/destination/async/AsyncStreamConsumerTest.kt

+11-20
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,7 @@ package io.airbyte.cdk.integrations.destination.async
77
import com.fasterxml.jackson.databind.JsonNode
88
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings
99
import io.airbyte.cdk.integrations.destination.async.buffers.BufferManager
10-
import io.airbyte.cdk.integrations.destination.async.deser.DeserializationUtil
11-
import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer
10+
import io.airbyte.cdk.integrations.destination.async.deser.AirbyteMessageDeserializer
1211
import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer
1312
import io.airbyte.cdk.integrations.destination.async.function.DestinationFlushFunction
1413
import io.airbyte.cdk.integrations.destination.async.model.PartialAirbyteMessage
@@ -125,7 +124,7 @@ class AsyncStreamConsumerTest {
125124
private lateinit var outputRecordCollector: Consumer<AirbyteMessage>
126125
private lateinit var flushFailure: FlushFailure
127126
private lateinit var streamAwareDataTransformer: StreamAwareDataTransformer
128-
private lateinit var deserializationUtil: DeserializationUtil
127+
private lateinit var airbyteMessageDeserializer: AirbyteMessageDeserializer
129128

130129
@BeforeEach
131130
@Suppress("UNCHECKED_CAST")
@@ -139,20 +138,18 @@ class AsyncStreamConsumerTest {
139138
flushFunction = Mockito.mock(DestinationFlushFunction::class.java)
140139
outputRecordCollector = Mockito.mock(Consumer::class.java) as Consumer<AirbyteMessage>
141140
flushFailure = Mockito.mock(FlushFailure::class.java)
142-
deserializationUtil = DeserializationUtil()
143-
streamAwareDataTransformer = IdentityDataTransformer()
141+
airbyteMessageDeserializer = AirbyteMessageDeserializer()
144142
consumer =
145143
AsyncStreamConsumer(
146144
outputRecordCollector = outputRecordCollector,
147145
onStart = onStart,
148146
onClose = onClose,
149-
flusher = flushFunction,
147+
onFlush = flushFunction,
150148
catalog = CATALOG,
151149
bufferManager = BufferManager(),
152150
flushFailure = flushFailure,
153151
defaultNamespace = Optional.of("default_ns"),
154-
dataTransformer = streamAwareDataTransformer,
155-
deserializationUtil = deserializationUtil,
152+
airbyteMessageDeserializer = airbyteMessageDeserializer,
156153
workerPool = Executors.newFixedThreadPool(5),
157154
)
158155

@@ -330,9 +327,8 @@ class AsyncStreamConsumerTest {
330327
val serializedAirbyteMessage = Jsons.serialize(airbyteMessage)
331328
val airbyteRecordString = Jsons.serialize(PAYLOAD)
332329
val partial =
333-
deserializationUtil.deserializeAirbyteMessage(
330+
airbyteMessageDeserializer.deserializeAirbyteMessage(
334331
serializedAirbyteMessage,
335-
streamAwareDataTransformer,
336332
)
337333
assertEquals(airbyteRecordString, partial.serialized)
338334
}
@@ -357,9 +353,8 @@ class AsyncStreamConsumerTest {
357353
val serializedAirbyteMessage = Jsons.serialize(airbyteMessage)
358354
val airbyteRecordString = Jsons.serialize(payload)
359355
val partial =
360-
deserializationUtil.deserializeAirbyteMessage(
356+
airbyteMessageDeserializer.deserializeAirbyteMessage(
361357
serializedAirbyteMessage,
362-
streamAwareDataTransformer,
363358
)
364359
assertEquals(airbyteRecordString, partial.serialized)
365360
}
@@ -378,9 +373,8 @@ class AsyncStreamConsumerTest {
378373
)
379374
val serializedAirbyteMessage = Jsons.serialize(airbyteMessage)
380375
val partial =
381-
deserializationUtil.deserializeAirbyteMessage(
376+
airbyteMessageDeserializer.deserializeAirbyteMessage(
382377
serializedAirbyteMessage,
383-
streamAwareDataTransformer,
384378
)
385379
assertEquals(emptyMap.toString(), partial.serialized)
386380
}
@@ -393,9 +387,8 @@ class AsyncStreamConsumerTest {
393387
assertThrows(
394388
RuntimeException::class.java,
395389
) {
396-
deserializationUtil.deserializeAirbyteMessage(
390+
airbyteMessageDeserializer.deserializeAirbyteMessage(
397391
serializedAirbyteMessage,
398-
streamAwareDataTransformer,
399392
)
400393
}
401394
}
@@ -404,9 +397,8 @@ class AsyncStreamConsumerTest {
404397
internal fun deserializeAirbyteMessageWithAirbyteState() {
405398
val serializedAirbyteMessage = Jsons.serialize(STATE_MESSAGE1)
406399
val partial =
407-
deserializationUtil.deserializeAirbyteMessage(
400+
airbyteMessageDeserializer.deserializeAirbyteMessage(
408401
serializedAirbyteMessage,
409-
streamAwareDataTransformer,
410402
)
411403
assertEquals(serializedAirbyteMessage, partial.serialized)
412404
}
@@ -430,9 +422,8 @@ class AsyncStreamConsumerTest {
430422
assertThrows(
431423
RuntimeException::class.java,
432424
) {
433-
deserializationUtil.deserializeAirbyteMessage(
425+
airbyteMessageDeserializer.deserializeAirbyteMessage(
434426
serializedAirbyteMessage,
435-
streamAwareDataTransformer,
436427
)
437428
}
438429
}

airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/JdbcBufferedConsumerFactory.kt

+3-4
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ import io.airbyte.cdk.integrations.destination.NamingConventionTransformer
1515
import io.airbyte.cdk.integrations.destination.StreamSyncSummary
1616
import io.airbyte.cdk.integrations.destination.async.AsyncStreamConsumer
1717
import io.airbyte.cdk.integrations.destination.async.buffers.BufferManager
18-
import io.airbyte.cdk.integrations.destination.async.deser.DeserializationUtil
18+
import io.airbyte.cdk.integrations.destination.async.deser.AirbyteMessageDeserializer
1919
import io.airbyte.cdk.integrations.destination.async.deser.IdentityDataTransformer
2020
import io.airbyte.cdk.integrations.destination.async.deser.StreamAwareDataTransformer
2121
import io.airbyte.cdk.integrations.destination.async.model.PartialAirbyteMessage
@@ -78,11 +78,10 @@ object JdbcBufferedConsumerFactory {
7878
),
7979
catalog,
8080
BufferManager((Runtime.getRuntime().maxMemory() * 0.2).toLong()),
81-
FlushFailure(),
8281
Optional.ofNullable(defaultNamespace),
82+
FlushFailure(),
8383
Executors.newFixedThreadPool(2),
84-
dataTransformer,
85-
DeserializationUtil()
84+
AirbyteMessageDeserializer(dataTransformer)
8685
)
8786
}
8887

airbyte-cdk/java/airbyte-cdk/s3-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/staging/AsyncFlush.kt

+1-3
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import org.apache.commons.io.FileUtils
2525
private val logger = KotlinLogging.logger {}
2626

2727
internal class AsyncFlush(
28-
streamDescToWriteConfig: Map<StreamDescriptor, WriteConfig>,
28+
private val streamDescToWriteConfig: Map<StreamDescriptor, WriteConfig>,
2929
private val stagingOperations: StagingOperations?,
3030
private val database: JdbcDatabase?,
3131
private val catalog: ConfiguredAirbyteCatalog?,
@@ -41,8 +41,6 @@ internal class AsyncFlush(
4141
override val optimalBatchSizeBytes: Long,
4242
private val useDestinationsV2Columns: Boolean
4343
) : DestinationFlushFunction {
44-
private val streamDescToWriteConfig: Map<StreamDescriptor, WriteConfig> =
45-
streamDescToWriteConfig
4644

4745
@Throws(Exception::class)
4846
override fun flush(decs: StreamDescriptor, stream: Stream<PartialAirbyteMessage>) {

0 commit comments

Comments
 (0)