Skip to content

Commit de56d47

Browse files
marcosmarxmjzcruiseredgaoChristopheDuong
authored
Publish PR 9029: clickhouse normalization (#9072)
* add normalization-clickhouse docker build step * bump normalization version * small changes gradle * fix settings gradle * fix eof file * correct clickhouse normalization * Refactor jinja template for scd (#9278) * merge chris code and regenerate sql files Co-authored-by: James Zhao <[email protected]> Co-authored-by: Edward Gao <[email protected]> Co-authored-by: Christophe Duong <[email protected]>
1 parent bd71999 commit de56d47

File tree

46 files changed

+254
-601
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+254
-601
lines changed

airbyte-integrations/bases/base-normalization/.dockerignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@
88
!dbt-project-template-mssql
99
!dbt-project-template-mysql
1010
!dbt-project-template-oracle
11+
!dbt-project-template-clickhouse

airbyte-integrations/bases/base-normalization/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,5 +28,5 @@ WORKDIR /airbyte
2828
ENV AIRBYTE_ENTRYPOINT "/airbyte/entrypoint.sh"
2929
ENTRYPOINT ["/airbyte/entrypoint.sh"]
3030

31-
LABEL io.airbyte.version=0.1.61
31+
LABEL io.airbyte.version=0.1.62
3232
LABEL io.airbyte.name=airbyte/normalization

airbyte-integrations/bases/base-normalization/build.gradle

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,15 @@ task airbyteDockerOracle(type: Exec, dependsOn: checkSshScriptCopy) {
6969
configure buildAirbyteDocker('oracle')
7070
dependsOn assemble
7171
}
72+
task airbyteDockerClickhouse(type: Exec, dependsOn: checkSshScriptCopy) {
73+
configure buildAirbyteDocker('clickhouse')
74+
dependsOn assemble
75+
}
7276

7377
airbyteDocker.dependsOn(airbyteDockerMSSql)
7478
airbyteDocker.dependsOn(airbyteDockerMySql)
7579
airbyteDocker.dependsOn(airbyteDockerOracle)
80+
airbyteDocker.dependsOn(airbyteDockerClickhouse)
7681

7782
task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) {
7883
module = "pytest"
@@ -86,6 +91,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs
8691
dependsOn ':airbyte-integrations:connectors:destination-snowflake:airbyteDocker'
8792
dependsOn ':airbyte-integrations:connectors:destination-oracle:airbyteDocker'
8893
dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
94+
dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
8995
}
9096

9197
integrationTest.dependsOn("customIntegrationTestPython")

airbyte-integrations/bases/base-normalization/docker-compose.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ services:
1010
image: airbyte/normalization-mysql:${VERSION}
1111
normalization-oracle:
1212
image: airbyte/normalization-oracle:${VERSION}
13+
normalization-clickhouse:
14+
image: airbyte/normalization-clickhouse:${VERSION}

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_cdc_excluded_scd.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212

1313
as (
1414

15+
-- depends_on: ref('dedup_cdc_excluded_stg')
1516
with
1617

1718
input_data as (
1819
select *
19-
from _airbyte_test_normalization.dedup_cdc_excluded_ab3
20+
from _airbyte_test_normalization.dedup_cdc_excluded_stg
2021
-- dedup_cdc_excluded from test_normalization._airbyte_raw_dedup_cdc_excluded
2122
),
2223

@@ -45,15 +46,15 @@ scd_data as (
4546
_ab_cdc_updated_at,
4647
_ab_cdc_deleted_at,
4748
_airbyte_emitted_at as _airbyte_start_at,
49+
case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row,
4850
anyOrNull(_airbyte_emitted_at) over (
4951
partition by id
5052
order by
5153
_airbyte_emitted_at is null asc,
5254
_airbyte_emitted_at desc,
5355
_airbyte_emitted_at desc, _ab_cdc_updated_at desc
54-
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
56+
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
5557
) as _airbyte_end_at,
56-
case when _airbyte_active_row_num = 1 and _ab_cdc_deleted_at is null then 1 else 0 end as _airbyte_active_row,
5758
_airbyte_ab_id,
5859
_airbyte_emitted_at,
5960
_airbyte_dedup_cdc_excluded_hashid
@@ -65,7 +66,7 @@ dedup_data as (
6566
-- additionally, we generate a unique key for the scd table
6667
row_number() over (
6768
partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at, accurateCastOrNull(_ab_cdc_deleted_at, 'String'), accurateCastOrNull(_ab_cdc_updated_at, 'String')
68-
order by _airbyte_ab_id
69+
order by _airbyte_active_row desc, _airbyte_ab_id
6970
) as _airbyte_row_num,
7071
assumeNotNull(hex(MD5(
7172

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/dedup_exchange_rate_scd.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212

1313
as (
1414

15+
-- depends_on: ref('dedup_exchange_rate_stg')
1516
with
1617

1718
input_data as (
1819
select *
19-
from _airbyte_test_normalization.dedup_exchange_rate_ab3
20+
from _airbyte_test_normalization.dedup_exchange_rate_stg
2021
-- dedup_exchange_rate from test_normalization._airbyte_raw_dedup_exchange_rate
2122
),
2223

@@ -54,15 +55,15 @@ scd_data as (
5455
NZD,
5556
USD,
5657
date as _airbyte_start_at,
58+
case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row,
5759
anyOrNull(date) over (
5860
partition by id, currency, cast(NZD as String)
5961
order by
6062
date is null asc,
6163
date desc,
6264
_airbyte_emitted_at desc
63-
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
65+
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
6466
) as _airbyte_end_at,
65-
case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row,
6667
_airbyte_ab_id,
6768
_airbyte_emitted_at,
6869
_airbyte_dedup_exchange_rate_hashid
@@ -74,7 +75,7 @@ dedup_data as (
7475
-- additionally, we generate a unique key for the scd table
7576
row_number() over (
7677
partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
77-
order by _airbyte_ab_id
78+
order by _airbyte_active_row desc, _airbyte_ab_id
7879
) as _airbyte_row_num,
7980
assumeNotNull(hex(MD5(
8081

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/scd/test_normalization/renamed_dedup_cdc_excluded_scd.sql

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,12 @@
1212

1313
as (
1414

15+
-- depends_on: ref('renamed_dedup_cdc_excluded_stg')
1516
with
1617

1718
input_data as (
1819
select *
19-
from _airbyte_test_normalization.renamed_dedup_cdc_excluded_ab3
20+
from _airbyte_test_normalization.renamed_dedup_cdc_excluded_stg
2021
-- renamed_dedup_cdc_excluded from test_normalization._airbyte_raw_renamed_dedup_cdc_excluded
2122
),
2223

@@ -41,15 +42,15 @@ scd_data as (
4142
))) as _airbyte_unique_key,
4243
id,
4344
_airbyte_emitted_at as _airbyte_start_at,
45+
case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row,
4446
anyOrNull(_airbyte_emitted_at) over (
4547
partition by id
4648
order by
4749
_airbyte_emitted_at is null asc,
4850
_airbyte_emitted_at desc,
4951
_airbyte_emitted_at desc
50-
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
52+
ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING
5153
) as _airbyte_end_at,
52-
case when _airbyte_active_row_num = 1 then 1 else 0 end as _airbyte_active_row,
5354
_airbyte_ab_id,
5455
_airbyte_emitted_at,
5556
_airbyte_renamed_dedup_cdc_excluded_hashid
@@ -61,7 +62,7 @@ dedup_data as (
6162
-- additionally, we generate a unique key for the scd table
6263
row_number() over (
6364
partition by _airbyte_unique_key, _airbyte_start_at, _airbyte_emitted_at
64-
order by _airbyte_ab_id
65+
order by _airbyte_active_row desc, _airbyte_ab_id
6566
) as _airbyte_row_num,
6667
assumeNotNull(hex(MD5(
6768

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
as (
1414

1515
-- Final base SQL model
16+
-- depends_on: test_normalization.dedup_cdc_excluded_scd
1617
select
1718
_airbyte_unique_key,
1819
id,

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_exchange_rate.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
as (
1414

1515
-- Final base SQL model
16+
-- depends_on: test_normalization.dedup_exchange_rate_scd
1617
select
1718
_airbyte_unique_key,
1819
id,

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/renamed_dedup_cdc_excluded.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
as (
1414

1515
-- Final base SQL model
16+
-- depends_on: test_normalization.renamed_dedup_cdc_excluded_scd
1617
select
1718
_airbyte_unique_key,
1819
id,

0 commit comments

Comments
 (0)