Skip to content

Commit 0ce6f03

Browse files
sspaetidanidelvalle-frontiers
authored andcommitted
🎉 New Destination: DuckDB (airbytehq#17494)
This is the first version of the DuckDB destination. There are potential edge cases that still need to be taken care of. But looking forward to your feedback.
1 parent 83fa6f8 commit 0ce6f03

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+1717
-8
lines changed
Lines changed: 5 additions & 0 deletions
Loading

airbyte-config/init/src/main/resources/seed/destination_definitions.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,3 +443,15 @@
443443
dockerImageTag: 0.1.0
444444
documentationUrl: https://docs.airbyte.com/integrations/destinations/weaviate
445445
releaseStage: alpha
446+
- name: DuckDB
447+
destinationDefinitionId: 94bd199c-2ff0-4aa2-b98e-17f0acb72610
448+
dockerRepository: airbyte/destination-duckdb
449+
dockerImageTag: 0.1.0
450+
documentationUrl: https://docs.airbyte.io/integrations/destinations/duckdb
451+
icon: duckdb.svg
452+
normalizationConfig:
453+
normalizationRepository: airbyte/normalization-duckdb
454+
normalizationTag: 0.2.25
455+
normalizationIntegrationType: duckdb
456+
supportsDbt: true
457+
releaseStage: alpha

airbyte-config/init/src/main/resources/seed/destination_specs.yaml

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7347,3 +7347,30 @@
73477347
supported_destination_sync_modes:
73487348
- "append"
73497349
- "overwrite"
7350+
- dockerImage: "airbyte/destination-duckdb:0.1.0"
7351+
spec:
7352+
documentationUrl: "https://docs.airbyte.io/integrations/destinations/duckdb"
7353+
connectionSpecification:
7354+
$schema: "http://json-schema.org/draft-07/schema#"
7355+
title: "DuckDB Destination Spec"
7356+
type: "object"
7357+
required:
7358+
- "destination_path"
7359+
additionalProperties: false
7360+
properties:
7361+
destination_path:
7362+
type: "string"
7363+
description: "Path to the destination.duckdb file. The file will be placed\
7364+
\ inside that local mount. For more information check out our <a href=\"\
7365+
https://docs.airbyte.io/integrations/destinations/duckdb\">docs</a>"
7366+
example: "/local/destination.duckdb"
7367+
schema:
7368+
type: "string"
7369+
description: "database schema, default for duckdb is main"
7370+
example: "main"
7371+
supportsIncremental: true
7372+
supportsNormalization: true
7373+
supportsDBT: true
7374+
supported_destination_sync_modes:
7375+
- "overwrite"
7376+
- "append"

airbyte-integrations/bases/base-normalization/build.gradle

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,10 @@ task airbyteDockerTiDB(type: Exec, dependsOn: checkSshScriptCopy) {
8181
configure buildAirbyteDocker('tidb')
8282
dependsOn assemble
8383
}
84+
task airbyteDockerDuckDB(type: Exec, dependsOn: checkSshScriptCopy) {
85+
configure buildAirbyteDocker('duckdb')
86+
dependsOn assemble
87+
}
8488

8589
airbyteDocker.dependsOn(airbyteDockerMSSql)
8690
airbyteDocker.dependsOn(airbyteDockerMySql)
@@ -89,6 +93,7 @@ airbyteDocker.dependsOn(airbyteDockerClickhouse)
8993
airbyteDocker.dependsOn(airbyteDockerSnowflake)
9094
airbyteDocker.dependsOn(airbyteDockerRedshift)
9195
airbyteDocker.dependsOn(airbyteDockerTiDB)
96+
airbyteDocker.dependsOn(airbyteDockerDuckDB)
9297

9398
task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) {
9499
module = "pytest"
@@ -104,6 +109,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs
104109
dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
105110
dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
106111
dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'
112+
dependsOn ':airbyte-integrations:connectors:destination-duckdb:airbyteDocker'
107113
}
108114

109115
// not really sure what this task does differently from customIntegrationTestPython, but it seems to also run integration tests
@@ -118,6 +124,7 @@ project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-
118124
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
119125
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
120126
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'
127+
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-duckdb:airbyteDocker'
121128

122129
// DATs have some additional tests that exercise normalization code paths,
123130
// so we want to run these in addition to the base-normalization integration tests.
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# This file is necessary to install dbt-utils with dbt deps
2+
# the content will be overwritten by the transform function
3+
4+
# Name your package! Package names should contain only lowercase characters
5+
# and underscores. A good package name should reflect your organization's
6+
# name or the intended use of these models
7+
name: "airbyte_utils"
8+
version: "1.0"
9+
config-version: 2
10+
11+
# This setting configures which "profile" dbt uses for this project. Profiles contain
12+
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
13+
profile: "normalize"
14+
15+
# These configurations specify where dbt should look for different types of files.
16+
# The `model-paths` config, for example, states that source models can be found
17+
# in the "models/" directory. You probably won't need to change these!
18+
model-paths: ["models"]
19+
docs-paths: ["docs"]
20+
analysis-paths: ["analysis"]
21+
test-paths: ["tests"]
22+
seed-paths: ["data"]
23+
macro-paths: ["macros"]
24+
25+
target-path: "../build" # directory which will store compiled SQL files
26+
log-path: "../logs" # directory which will store DBT logs
27+
packages-install-path: "/dbt" # directory which will store external DBT dependencies
28+
29+
clean-targets: # directories to be removed by `dbt clean`
30+
- "build"
31+
- "dbt_modules"
32+
33+
quoting:
34+
database: true
35+
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
36+
# all schemas should be unquoted
37+
schema: false
38+
identifier: true
39+
40+
# You can define configurations for models in the `model-paths` directory here.
41+
# Using these configurations, you can enable or disable models, change how they
42+
# are materialized, and more!
43+
models:
44+
airbyte_utils:
45+
+materialized: table
46+
generated:
47+
airbyte_ctes:
48+
+tags: airbyte_internal_cte
49+
+materialized: ephemeral
50+
airbyte_incremental:
51+
+tags: incremental_tables
52+
+materialized: incremental
53+
+on_schema_change: sync_all_columns
54+
airbyte_tables:
55+
+tags: normalized_tables
56+
+materialized: table
57+
airbyte_views:
58+
+tags: airbyte_internal_views
59+
+materialized: view
60+
61+
dispatch:
62+
- macro_namespace: dbt_utils
63+
search_order: ["airbyte_utils", "dbt_utils"]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# add dependencies. these will get pulled during the `dbt deps` process.
2+
3+
packages:
4+
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
5+
revision: 0.8.2

airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/array.sql

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,10 @@
4646
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
4747
{%- endmacro %}
4848

49+
{% macro duckdb__cross_join_unnest(stream_name, array_col) -%}
50+
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
51+
{%- endmacro %}
52+
4953
{% macro redshift__cross_join_unnest(stream_name, array_col) -%}
5054
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
5155
{%- endmacro %}
@@ -95,6 +99,10 @@
9599
_airbyte_nested_data
96100
{%- endmacro %}
97101

102+
{% macro duckdb__unnested_column_value(column_col) -%}
103+
_airbyte_nested_data
104+
{%- endmacro %}
105+
98106
{% macro oracle__unnested_column_value(column_col) -%}
99107
{{ column_col }}
100108
{%- endmacro %}
@@ -193,3 +201,7 @@ joined as (
193201
{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%}
194202
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
195203
{%- endmacro %}
204+
205+
{% macro duckdb__unnest_cte(from_table, stream_name, column_col) -%}
206+
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
207+
{%- endmacro %}

airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/concat.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,7 @@
3030
{% macro tidb__concat(fields) -%}
3131
concat({{ fields|join(', ') }})
3232
{%- endmacro %}
33+
34+
{% macro duckdb__concat(fields) -%}
35+
concat({{ fields|join(', ') }})
36+
{%- endmacro %}

airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/datatypes.sql

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@
4444
json
4545
{%- endmacro -%}
4646

47+
{%- macro duckdb__type_json() -%}
48+
json
49+
{%- endmacro -%}
4750

4851
{# string ------------------------------------------------- #}
4952

@@ -72,6 +75,10 @@
7275
char(1000)
7376
{%- endmacro -%}
7477

78+
{%- macro duckdb__type_string() -%}
79+
VARCHAR
80+
{%- endmacro -%}
81+
7582
{# float ------------------------------------------------- #}
7683
{% macro mysql__type_float() %}
7784
float
@@ -89,6 +96,10 @@
8996
float
9097
{% endmacro %}
9198

99+
{% macro duckdb__type_float() %}
100+
DOUBLE
101+
{% endmacro %}
102+
92103
{# int ------------------------------------------------- #}
93104
{% macro default__type_int() %}
94105
int
@@ -110,6 +121,10 @@
110121
signed
111122
{% endmacro %}
112123

124+
{% macro duckdb__type_int() %}
125+
INTEGER
126+
{% endmacro %}
127+
113128
{# bigint ------------------------------------------------- #}
114129
{% macro mysql__type_bigint() %}
115130
signed
@@ -127,6 +142,10 @@
127142
signed
128143
{% endmacro %}
129144

145+
{% macro duckdb__type_bigint() %}
146+
BIGINT
147+
{% endmacro %}
148+
130149
{# numeric ------------------------------------------------- --#}
131150
{% macro mysql__type_numeric() %}
132151
float
@@ -140,6 +159,10 @@
140159
float
141160
{% endmacro %}
142161

162+
{% macro duckdb__type_numeric() %}
163+
DOUBLE
164+
{% endmacro %}
165+
143166
{# very_large_integer --------------------------------------- --#}
144167
{#
145168
Most databases don't have a true unbounded numeric datatype, so we use a really big numeric field.
@@ -170,6 +193,10 @@ so this macro needs to be called very_large_integer.
170193
decimal(38, 0)
171194
{% endmacro %}
172195
196+
{% macro duckdb__type_very_large_integer() %}
197+
DECIMAL(38, 0)
198+
{% endmacro %}
199+
173200
{# timestamp ------------------------------------------------- --#}
174201
{% macro mysql__type_timestamp() %}
175202
time
@@ -189,6 +216,10 @@ so this macro needs to be called very_large_integer.
189216
time
190217
{% endmacro %}
191218
219+
{% macro duckdb__type_timestamp() %}
220+
TIMESTAMP
221+
{% endmacro %}
222+
192223
{# timestamp with time zone ------------------------------------------------- #}
193224
194225
{%- macro type_timestamp_with_timezone() -%}
@@ -229,6 +260,10 @@ so this macro needs to be called very_large_integer.
229260
char(1000)
230261
{%- endmacro -%}
231262

263+
{%- macro duckdb__type_timestamp_with_timezone() -%}
264+
TIMESTAMPTZ
265+
{%- endmacro -%}
266+
232267
{# timestamp without time zone ------------------------------------------------- #}
233268

234269
{%- macro type_timestamp_without_timezone() -%}
@@ -261,6 +296,10 @@ so this macro needs to be called very_large_integer.
261296
datetime
262297
{% endmacro %}
263298

299+
{% macro duckdb__type_timestamp_without_timezone() %}
300+
TIMESTAMP
301+
{% endmacro %}
302+
264303
{# time without time zone ------------------------------------------------- #}
265304

266305
{%- macro type_time_without_timezone() -%}
@@ -287,6 +326,9 @@ so this macro needs to be called very_large_integer.
287326
time
288327
{% endmacro %}
289328

329+
{% macro duckdb__type_time_without_timezone() %}
330+
TIMESTAMP
331+
{% endmacro %}
290332

291333
{# time with time zone ------------------------------------------------- #}
292334

@@ -330,6 +372,9 @@ so this macro needs to be called very_large_integer.
330372
char(1000)
331373
{%- endmacro -%}
332374

375+
{%- macro duckdb__type_time_with_timezone() -%}
376+
TIMESTAMPTZ
377+
{%- endmacro -%}
333378
{# date ------------------------------------------------- #}
334379

335380
{%- macro type_date() -%}

airbyte-integrations/bases/base-normalization/dbt-project-template/macros/cross_db_utils/json_operations.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
88
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
99
- TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions
10+
- DuckDB: json_extract(json, 'path') note: If path is a LIST, the result will be a LIST of JSON -> https://duckdb.org/docs/extensions/json
1011
#}
1112

1213
{# format_json_path -------------------------------------------------- #}
@@ -103,6 +104,11 @@
103104
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
104105
{%- endmacro %}
105106

107+
{% macro duckdb__format_json_path(json_path_list) -%}
108+
{# -- '$."x"."y"."z"' #}
109+
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
110+
{%- endmacro %}
111+
106112
{# json_extract ------------------------------------------------- #}
107113

108114
{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
@@ -180,6 +186,14 @@
180186
{% endif -%}
181187
{%- endmacro %}
182188

189+
{% macro duckdb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
190+
{%- if from_table|string() == '' %}
191+
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
192+
{% else %}
193+
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
194+
{% endif -%}
195+
{%- endmacro %}
196+
183197
{# json_extract_scalar ------------------------------------------------- #}
184198

185199
{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
@@ -234,6 +248,10 @@
234248
)
235249
{%- endmacro %}
236250

251+
{% macro duckdb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
252+
json_extract_string({{ json_column }}, {{ format_json_path(json_path_list) }})
253+
{%- endmacro %}
254+
237255
{# json_extract_array ------------------------------------------------- #}
238256

239257
{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
@@ -284,6 +302,10 @@
284302
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
285303
{%- endmacro %}
286304

305+
{% macro duckdb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
306+
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
307+
{%- endmacro %}
308+
287309
{# json_extract_string_array ------------------------------------------------- #}
288310

289311
{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}

0 commit comments

Comments
 (0)