5
5
package io.airbyte.integrations.destination.bigquery.operation
6
6
7
7
import com.google.cloud.bigquery.BigQuery
8
+ import com.google.cloud.bigquery.QueryJobConfiguration
8
9
import com.google.cloud.bigquery.TableId
10
+ import com.google.cloud.bigquery.TableResult
9
11
import io.airbyte.integrations.base.destination.operation.StorageOperation
10
12
import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig
11
13
import io.airbyte.integrations.base.destination.typing_deduping.StreamId
@@ -14,10 +16,9 @@ import io.airbyte.integrations.destination.bigquery.BigQueryUtils
14
16
import io.airbyte.integrations.destination.bigquery.formatter.BigQueryRecordFormatter
15
17
import io.airbyte.integrations.destination.bigquery.typing_deduping.BigQueryDestinationHandler
16
18
import io.airbyte.integrations.destination.bigquery.typing_deduping.BigQuerySqlGenerator
17
- import io.airbyte.protocol.models.v0.DestinationSyncMode
18
19
import io.github.oshai.kotlinlogging.KotlinLogging
19
20
import java.time.Instant
20
- import java.util.*
21
+ import java.util.Optional
21
22
import java.util.concurrent.ConcurrentHashMap
22
23
23
24
private val log = KotlinLogging .logger {}
@@ -29,39 +30,83 @@ abstract class BigQueryStorageOperation<Data>(
29
30
protected val datasetLocation : String
30
31
) : StorageOperation<Data> {
31
32
private val existingSchemas = ConcurrentHashMap .newKeySet<String >()
32
- override fun prepareStage (streamId : StreamId , destinationSyncMode : DestinationSyncMode ) {
33
+ override fun prepareStage (streamId : StreamId , suffix : String , replace : Boolean ) {
33
34
// Prepare staging table. For overwrite, it does drop-create so we can skip explicit create.
34
- if (destinationSyncMode == DestinationSyncMode . OVERWRITE ) {
35
- truncateStagingTable(streamId)
35
+ if (replace ) {
36
+ truncateStagingTable(streamId, suffix )
36
37
} else {
37
- createStagingTable(streamId)
38
+ createStagingTable(streamId, suffix )
38
39
}
39
40
}
40
41
41
- private fun createStagingTable (streamId : StreamId ) {
42
- val tableId = TableId .of(streamId.rawNamespace, streamId.rawName)
42
+ override fun overwriteStage (streamId : StreamId , suffix : String ) {
43
+ bigquery.delete(tableId(streamId, " " ))
44
+ bigquery.query(
45
+ QueryJobConfiguration .of(
46
+ """ ALTER TABLE `${streamId.rawNamespace} `.`${streamId.rawName}$suffix ` RENAME TO `${streamId.rawName} `"""
47
+ ),
48
+ )
49
+ }
50
+
51
+ override fun transferFromTempStage (streamId : StreamId , suffix : String ) {
52
+ // TODO figure out how to make this work
53
+ // something about incompatible partitioning spec (probably b/c we're copying from a temp
54
+ // table partitioned on generation ID into an old real raw table partitioned on
55
+ // extracted_at)
56
+ val tempRawTable = tableId(streamId, suffix)
57
+ // val jobConf =
58
+ // CopyJobConfiguration.newBuilder(tableId(streamId, ""), tempRawTable)
59
+ // .setWriteDisposition(JobInfo.WriteDisposition.WRITE_APPEND)
60
+ // .build()
61
+ // val job = bigquery.create(JobInfo.of(jobConf))
62
+ // BigQueryUtils.waitForJobFinish(job)
63
+
64
+ bigquery.query(
65
+ QueryJobConfiguration .of(
66
+ """
67
+ INSERT INTO `${streamId.rawNamespace} `.`${streamId.rawName} `
68
+ SELECT * FROM `${streamId.rawNamespace} `.`${streamId.rawName}$suffix `
69
+ """ .trimIndent()
70
+ )
71
+ )
72
+ bigquery.delete(tempRawTable)
73
+ }
74
+
75
+ override fun getStageGeneration (streamId : StreamId , suffix : String ): Long? {
76
+ val result: TableResult =
77
+ bigquery.query(
78
+ QueryJobConfiguration .of(
79
+ " SELECT _airbyte_generation_id FROM ${streamId.rawNamespace} .${streamId.rawName}$suffix LIMIT 1"
80
+ ),
81
+ )
82
+ if (result.totalRows == 0L ) {
83
+ return null
84
+ }
85
+ return result.iterateAll().first()[" _airbyte_generation_id" ].longValue
86
+ }
87
+
88
+ private fun createStagingTable (streamId : StreamId , suffix : String ) {
43
89
BigQueryUtils .createPartitionedTableIfNotExists(
44
90
bigquery,
45
- tableId,
46
- BigQueryRecordFormatter .SCHEMA_V2
91
+ tableId(streamId, suffix) ,
92
+ BigQueryRecordFormatter .SCHEMA_V2 ,
47
93
)
48
94
}
49
95
50
- private fun dropStagingTable (streamId : StreamId ) {
51
- val tableId = TableId .of(streamId.rawNamespace, streamId.rawName)
52
- bigquery.delete(tableId)
96
+ private fun dropStagingTable (streamId : StreamId , suffix : String ) {
97
+ bigquery.delete(tableId(streamId, suffix))
53
98
}
54
99
55
100
/* *
56
101
* "Truncates" table, this is a workaround to the issue with TRUNCATE TABLE in BigQuery where
57
102
* the table's partition filter must be turned off to truncate. Since deleting a table is a free
58
103
* operation this option re-uses functions that already exist
59
104
*/
60
- private fun truncateStagingTable (streamId : StreamId ) {
105
+ private fun truncateStagingTable (streamId : StreamId , suffix : String ) {
61
106
val tableId = TableId .of(streamId.rawNamespace, streamId.rawName)
62
107
log.info { " Truncating raw table $tableId " }
63
- dropStagingTable(streamId)
64
- createStagingTable(streamId)
108
+ dropStagingTable(streamId, suffix )
109
+ createStagingTable(streamId, suffix )
65
110
}
66
111
67
112
override fun cleanupStage (streamId : StreamId ) {
@@ -91,7 +136,7 @@ abstract class BigQueryStorageOperation<Data>(
91
136
} "
92
137
}
93
138
destinationHandler.execute(
94
- sqlGenerator.overwriteFinalTable(streamConfig.id, tmpTableSuffix)
139
+ sqlGenerator.overwriteFinalTable(streamConfig.id, tmpTableSuffix),
95
140
)
96
141
}
97
142
}
@@ -109,4 +154,9 @@ abstract class BigQueryStorageOperation<Data>(
109
154
finalTableSuffix,
110
155
)
111
156
}
157
+
158
+ companion object {
159
+ fun tableId (streamId : StreamId , suffix : String = ""): TableId =
160
+ TableId .of(streamId.rawNamespace, streamId.rawName + suffix)
161
+ }
112
162
}
0 commit comments