Skip to content

Commit c5d4a97

Browse files
🐛 Fix normalization issue with quoted & case sensitive columns (#9317)
1 parent e0bac4a commit c5d4a97

File tree

43 files changed

+1030
-267
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1030
-267
lines changed

airbyte-integrations/bases/base-normalization/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ integration_tests/normalization_test_output/**/*.yml
2020
# Simple Streams
2121
!integration_tests/normalization_test_output/**/dedup_exchange_rate*.sql
2222
!integration_tests/normalization_test_output/**/exchange_rate.sql
23+
!integration_tests/normalization_test_output/**/test_simple_streams/first_output/airbyte_views/**/multiple_column_names_conflicts_stg.sql
2324
# Nested Streams
2425
# Parent table
2526
!integration_tests/normalization_test_output/**/nested_stream_with*_names_ab*.sql
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
2+
3+
create or replace view `dataline-integration-testing`._airbyte_test_normalization.`multiple_column_names_conflicts_stg`
4+
OPTIONS()
5+
as
6+
with __dbt__cte__multiple_column_names_conflicts_ab1 as (
7+
8+
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
9+
-- depends_on: `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts
10+
select
11+
json_extract_scalar(_airbyte_data, "$['id']") as id,
12+
json_extract_scalar(_airbyte_data, "$['User Id']") as User_Id,
13+
json_extract_scalar(_airbyte_data, "$['user_id']") as user_id_1,
14+
json_extract_scalar(_airbyte_data, "$['User id']") as User_id_2,
15+
json_extract_scalar(_airbyte_data, "$['user id']") as user_id_3,
16+
json_extract_scalar(_airbyte_data, "$['User@Id']") as User_Id_4,
17+
json_extract_scalar(_airbyte_data, "$['UserId']") as UserId,
18+
_airbyte_ab_id,
19+
_airbyte_emitted_at,
20+
CURRENT_TIMESTAMP() as _airbyte_normalized_at
21+
from `dataline-integration-testing`.test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias
22+
-- multiple_column_names_conflicts
23+
where 1 = 1
24+
25+
), __dbt__cte__multiple_column_names_conflicts_ab2 as (
26+
27+
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
28+
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1
29+
select
30+
cast(id as
31+
int64
32+
) as id,
33+
cast(User_Id as
34+
string
35+
) as User_Id,
36+
cast(user_id_1 as
37+
float64
38+
) as user_id_1,
39+
cast(User_id_2 as
40+
float64
41+
) as User_id_2,
42+
cast(user_id_3 as
43+
float64
44+
) as user_id_3,
45+
cast(User_Id_4 as
46+
string
47+
) as User_Id_4,
48+
cast(UserId as
49+
float64
50+
) as UserId,
51+
_airbyte_ab_id,
52+
_airbyte_emitted_at,
53+
CURRENT_TIMESTAMP() as _airbyte_normalized_at
54+
from __dbt__cte__multiple_column_names_conflicts_ab1
55+
-- multiple_column_names_conflicts
56+
where 1 = 1
57+
58+
)-- SQL model to build a hash column based on the values of this record
59+
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2
60+
select
61+
to_hex(md5(cast(concat(coalesce(cast(id as
62+
string
63+
), ''), '-', coalesce(cast(User_Id as
64+
string
65+
), ''), '-', coalesce(cast(user_id_1 as
66+
string
67+
), ''), '-', coalesce(cast(User_id_2 as
68+
string
69+
), ''), '-', coalesce(cast(user_id_3 as
70+
string
71+
), ''), '-', coalesce(cast(User_Id_4 as
72+
string
73+
), ''), '-', coalesce(cast(UserId as
74+
string
75+
), '')) as
76+
string
77+
))) as _airbyte_multiple_column_names_conflicts_hashid,
78+
tmp.*
79+
from __dbt__cte__multiple_column_names_conflicts_ab2 tmp
80+
-- multiple_column_names_conflicts
81+
where 1 = 1
82+
;
83+

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/bigquery/test_simple_streams/models/generated/sources.yml

+1
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,6 @@ sources:
99
- name: _airbyte_raw_dedup_cdc_excluded
1010
- name: _airbyte_raw_dedup_exchange_rate
1111
- name: _airbyte_raw_exchange_rate
12+
- name: _airbyte_raw_multiple_column_names_conflicts
1213
- name: _airbyte_raw_pos_dedup_cdcx
1314
- name: _airbyte_raw_renamed_dedup_cdc_excluded

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/first_output/airbyte_incremental/test_normalization/dedup_cdc_excluded.sql

-34
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
2+
3+
create view _airbyte_test_normalization.multiple_column_names_conflicts_stg__dbt_tmp
4+
5+
as (
6+
7+
with __dbt__cte__multiple_column_names_conflicts_ab1 as (
8+
9+
-- SQL model to parse JSON blob stored in a single column and extract into separated field columns as described by the JSON Schema
10+
-- depends_on: test_normalization._airbyte_raw_multiple_column_names_conflicts
11+
select
12+
JSONExtractRaw(_airbyte_data, 'id') as id,
13+
JSONExtractRaw(_airbyte_data, 'User Id') as "User Id",
14+
JSONExtractRaw(_airbyte_data, 'user_id') as user_id,
15+
JSONExtractRaw(_airbyte_data, 'User id') as "User id",
16+
JSONExtractRaw(_airbyte_data, 'user id') as "user id",
17+
JSONExtractRaw(_airbyte_data, 'User@Id') as "User@Id",
18+
JSONExtractRaw(_airbyte_data, 'UserId') as UserId,
19+
_airbyte_ab_id,
20+
_airbyte_emitted_at,
21+
now() as _airbyte_normalized_at
22+
from test_normalization._airbyte_raw_multiple_column_names_conflicts as table_alias
23+
-- multiple_column_names_conflicts
24+
where 1 = 1
25+
26+
), __dbt__cte__multiple_column_names_conflicts_ab2 as (
27+
28+
-- SQL model to cast each column to its adequate SQL type converted from the JSON schema type
29+
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab1
30+
select
31+
accurateCastOrNull(id, '
32+
BIGINT
33+
') as id,
34+
nullif(accurateCastOrNull(trim(BOTH '"' from "User Id"), 'String'), 'null') as "User Id",
35+
accurateCastOrNull(user_id, '
36+
Float64
37+
') as user_id,
38+
accurateCastOrNull("User id", '
39+
Float64
40+
') as "User id",
41+
accurateCastOrNull("user id", '
42+
Float64
43+
') as "user id",
44+
nullif(accurateCastOrNull(trim(BOTH '"' from "User@Id"), 'String'), 'null') as "User@Id",
45+
accurateCastOrNull(UserId, '
46+
Float64
47+
') as UserId,
48+
_airbyte_ab_id,
49+
_airbyte_emitted_at,
50+
now() as _airbyte_normalized_at
51+
from __dbt__cte__multiple_column_names_conflicts_ab1
52+
-- multiple_column_names_conflicts
53+
where 1 = 1
54+
55+
)-- SQL model to build a hash column based on the values of this record
56+
-- depends_on: __dbt__cte__multiple_column_names_conflicts_ab2
57+
select
58+
assumeNotNull(hex(MD5(
59+
60+
toString(id) || '~' ||
61+
62+
63+
toString("User Id") || '~' ||
64+
65+
66+
toString(user_id) || '~' ||
67+
68+
69+
toString("User id") || '~' ||
70+
71+
72+
toString("user id") || '~' ||
73+
74+
75+
toString("User@Id") || '~' ||
76+
77+
78+
toString(UserId)
79+
80+
))) as _airbyte_multiple_co__ames_conflicts_hashid,
81+
tmp.*
82+
from __dbt__cte__multiple_column_names_conflicts_ab2 tmp
83+
-- multiple_column_names_conflicts
84+
where 1 = 1
85+
86+
)

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab1.sql

-21
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/dedup_cdc_excluded_ab2.sql

-21
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab1.sql

-24
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab2.sql

-24
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/exchange_rate_ab3.sql

-24
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab1.sql

-22
This file was deleted.

airbyte-integrations/bases/base-normalization/integration_tests/normalization_test_output/clickhouse/test_simple_streams/models/generated/airbyte_ctes/test_normalization/pos_dedup_cdcx_ab2.sql

-22
This file was deleted.

0 commit comments

Comments
 (0)