Skip to content

Commit 1f34b9c

Browse files
committed
add type option to cdk common code
1 parent 1e9ee1d commit 1f34b9c

File tree

3 files changed

+76
-122
lines changed

3 files changed

+76
-122
lines changed

airbyte-cdk/java/airbyte-cdk/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ Maven and Gradle will automatically reference the correct (pinned) version of th
144144

145145
| Version | Date | Pull Request | Subject |
146146
|:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
147+
| 0.30.2 | 2024-04-12 | [\#37006](https://github.com/airbytehq/airbyte/pull/37006) | Destinations - add airbyte meta column to `SQLOperationsUtils` |
147148
| 0.30.1 | 2024-04-11 | [\#36919](https://github.com/airbytehq/airbyte/pull/36919) | Fix regression in sources conversion of null values |
148149
| 0.30.0 | 2024-04-11 | [\#36974](https://github.com/airbytehq/airbyte/pull/36974) | Destinations: Pass config to jdbc sqlgenerator; allow cascade drop |
149150
| 0.29.13 | 2024-04-10 | [\#36981](https://github.com/airbytehq/airbyte/pull/36981) | DB sources : Emit analytics for data type serialization errors. |
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.30.1
1+
version=0.30.2

airbyte-cdk/java/airbyte-cdk/db-destinations/src/main/kotlin/io/airbyte/cdk/integrations/destination/jdbc/SqlOperationsUtils.kt

Lines changed: 74 additions & 121 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,11 @@
33
*/
44
package io.airbyte.cdk.integrations.destination.jdbc
55

6-
import com.google.common.annotations.VisibleForTesting
6+
import com.fasterxml.jackson.databind.ObjectMapper
77
import com.google.common.collect.Iterables
88
import io.airbyte.cdk.db.jdbc.JdbcDatabase
99
import io.airbyte.cdk.integrations.base.TypingAndDedupingFlag.isDestinationV2
1010
import io.airbyte.cdk.integrations.destination.async.model.PartialAirbyteMessage
11-
import io.airbyte.commons.functional.CheckedConsumer
1211
import java.sql.Connection
1312
import java.sql.SQLException
1413
import java.sql.Timestamp
@@ -17,136 +16,90 @@ import java.util.*
1716
import java.util.function.Consumer
1817
import java.util.function.Supplier
1918

20-
object SqlOperationsUtils {
21-
/**
22-
* Inserts "raw" records in a single query. The purpose of helper to abstract away
23-
* database-specific SQL syntax from this query.
24-
*
25-
* @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id,
26-
* data, emitted_at)
27-
* @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?)
28-
* @param jdbcDatabase jdbc database
29-
* @param records records to write
30-
* @throws SQLException exception
31-
*/
32-
@JvmStatic
33-
@Throws(SQLException::class)
34-
fun insertRawRecordsInSingleQuery(
35-
insertQueryComponent: String?,
36-
recordQueryComponent: String?,
37-
jdbcDatabase: JdbcDatabase,
38-
records: List<PartialAirbyteMessage>
39-
) {
40-
insertRawRecordsInSingleQuery(
41-
insertQueryComponent,
42-
recordQueryComponent,
43-
jdbcDatabase,
44-
records,
45-
{ UUID.randomUUID() },
46-
true
47-
)
19+
/**
20+
* Inserts "raw" records in a single query. The purpose of helper to abstract away database-specific
21+
* SQL syntax from this query.
22+
*
23+
* @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id,
24+
* data, emitted_at)
25+
* @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?)
26+
* @param jdbcDatabase jdbc database
27+
* @param records records to write
28+
* @throws SQLException exception
29+
*/
30+
@Throws(SQLException::class)
31+
fun insertRawRecordsInSingleQuery(
32+
insertQueryComponent: String?,
33+
recordQueryComponent: String?,
34+
jdbcDatabase: JdbcDatabase,
35+
records: List<PartialAirbyteMessage>,
36+
uuidSupplier: Supplier<UUID> = Supplier { UUID.randomUUID() },
37+
sem: Boolean = true,
38+
partitionSize: Int = 10000,
39+
nullSqlWideType: Int = java.sql.Types.VARCHAR
40+
) {
41+
if (records.isEmpty()) {
42+
return
4843
}
44+
val objectMapper = ObjectMapper()
45+
jdbcDatabase.execute { connection: Connection ->
4946

50-
/**
51-
* Inserts "raw" records in a single query. The purpose of helper to abstract away
52-
* database-specific SQL syntax from this query.
53-
*
54-
* This version does not add a semicolon at the end of the INSERT statement.
55-
*
56-
* @param insertQueryComponent the first line of the query e.g. INSERT INTO public.users (ab_id,
57-
* data, emitted_at)
58-
* @param recordQueryComponent query template for a full record e.g. (?, ?::jsonb ?)
59-
* @param jdbcDatabase jdbc database
60-
* @param records records to write
61-
* @throws SQLException exception
62-
*/
63-
@Throws(SQLException::class)
64-
fun insertRawRecordsInSingleQueryNoSem(
65-
insertQueryComponent: String?,
66-
recordQueryComponent: String?,
67-
jdbcDatabase: JdbcDatabase,
68-
records: List<PartialAirbyteMessage>
69-
) {
70-
insertRawRecordsInSingleQuery(
71-
insertQueryComponent,
72-
recordQueryComponent,
73-
jdbcDatabase,
74-
records,
75-
{ UUID.randomUUID() },
76-
false
77-
)
78-
}
47+
// Strategy: We want to use PreparedStatement because it handles binding values to
48+
// the SQL query
49+
// (e.g. handling formatting timestamps). A PreparedStatement statement is created
50+
// by supplying the
51+
// full SQL string at creation time. Then subsequently specifying which values are
52+
// bound to the
53+
// string. Thus there will be two loops below.
54+
// 1) Loop over records to build the full string.
55+
// 2) Loop over the records and bind the appropriate values to the string.
56+
// We also partition the query to run on 10k records at a time, since some DBs set a
57+
// max limit on
58+
// how many records can be inserted at once
59+
// TODO(sherif) this should use a smarter, destination-aware partitioning scheme
60+
// instead of 10k by
61+
// default
62+
for (partition in Iterables.partition(records, partitionSize)) {
63+
val sql = StringBuilder(insertQueryComponent)
64+
partition.forEach(
65+
Consumer { r: PartialAirbyteMessage? -> sql.append(recordQueryComponent) }
66+
)
67+
val s = sql.toString()
68+
val s1 = s.substring(0, s.length - 2) + (if (sem) ";" else "")
7969

80-
@VisibleForTesting
81-
@Throws(SQLException::class)
82-
fun insertRawRecordsInSingleQuery(
83-
insertQueryComponent: String?,
84-
recordQueryComponent: String?,
85-
jdbcDatabase: JdbcDatabase,
86-
records: List<PartialAirbyteMessage>,
87-
uuidSupplier: Supplier<UUID>,
88-
sem: Boolean
89-
) {
90-
if (records.isEmpty()) {
91-
return
92-
}
70+
connection.prepareStatement(s1).use { statement ->
71+
// second loop: bind values to the SQL string.
72+
// 1-indexed
73+
var i = 1
74+
for (message in partition) {
75+
// Airbyte Raw ID
76+
statement.setString(i++, uuidSupplier.get().toString())
9377

94-
jdbcDatabase.execute(
95-
CheckedConsumer { connection: Connection ->
78+
// Message Data
79+
statement.setString(i++, message.serialized)
9680

97-
// Strategy: We want to use PreparedStatement because it handles binding values to
98-
// the SQL query
99-
// (e.g. handling formatting timestamps). A PreparedStatement statement is created
100-
// by supplying the
101-
// full SQL string at creation time. Then subsequently specifying which values are
102-
// bound to the
103-
// string. Thus there will be two loops below.
104-
// 1) Loop over records to build the full string.
105-
// 2) Loop over the records and bind the appropriate values to the string.
106-
// We also partition the query to run on 10k records at a time, since some DBs set a
107-
// max limit on
108-
// how many records can be inserted at once
109-
// TODO(sherif) this should use a smarter, destination-aware partitioning scheme
110-
// instead of 10k by
111-
// default
112-
for (partition in Iterables.partition(records, 10000)) {
113-
val sql = StringBuilder(insertQueryComponent)
114-
partition.forEach(
115-
Consumer { r: PartialAirbyteMessage? -> sql.append(recordQueryComponent) }
81+
// Extracted At
82+
statement.setTimestamp(
83+
i++,
84+
Timestamp.from(Instant.ofEpochMilli(message.record!!.emittedAt))
11685
)
117-
val s = sql.toString()
118-
val s1 = s.substring(0, s.length - 2) + (if (sem) ";" else "")
119-
120-
connection.prepareStatement(s1).use { statement ->
121-
// second loop: bind values to the SQL string.
122-
// 1-indexed
123-
var i = 1
124-
for (message in partition) {
125-
// Airbyte Raw ID
126-
statement.setString(i, uuidSupplier.get().toString())
127-
i++
12886

129-
// Message Data
130-
statement.setString(i, message.serialized)
131-
i++
132-
133-
// Extracted At
134-
statement.setTimestamp(
135-
i,
136-
Timestamp.from(Instant.ofEpochMilli(message.record!!.emittedAt))
87+
if (isDestinationV2) {
88+
// Loaded At
89+
statement.setTimestamp(i++, null)
90+
// Airbyte Meta
91+
if (message.record!!.meta != null) {
92+
statement.setString(
93+
i++,
94+
objectMapper.writeValueAsString(message.record!!.meta)
13795
)
138-
i++
139-
140-
if (isDestinationV2) {
141-
// Loaded At
142-
statement.setTimestamp(i, null)
143-
i++
144-
}
96+
} else {
97+
statement.setNull(i++, nullSqlWideType)
14598
}
146-
statement.execute()
14799
}
148100
}
101+
statement.execute()
149102
}
150-
)
103+
}
151104
}
152105
}

0 commit comments

Comments
 (0)