Skip to content

🎉 New Destination: TiDB #15592

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 34 commits into from
Aug 31, 2022
Merged
Show file tree
Hide file tree
Changes from 24 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
d431d1b
Add new destination-tidb
Daemonxiao Aug 1, 2022
9590e49
support sync
Daemonxiao Aug 3, 2022
a2e4220
Add normalization-tidb
Daemonxiao Aug 10, 2022
f495206
fix failed tests
Daemonxiao Aug 11, 2022
6574536
Add unnest marco
Daemonxiao Aug 11, 2022
e09fbaf
Merge remote-tracking branch 'origin/master' into destination-tidb-co…
Daemonxiao Aug 12, 2022
d69a89a
fmt
Daemonxiao Aug 12, 2022
0a2cb82
Add new destination-tidb
Daemonxiao Aug 1, 2022
2d2689b
support sync
Daemonxiao Aug 3, 2022
b36c97f
Add normalization-tidb
Daemonxiao Aug 10, 2022
d311ecb
fix failed tests
Daemonxiao Aug 11, 2022
f077a16
Add unnest marco
Daemonxiao Aug 11, 2022
b18cbfe
fmt
Daemonxiao Aug 12, 2022
9b7180a
fmt
Daemonxiao Aug 17, 2022
0d316aa
Merge remote-tracking branch 'origin/destination-tidb-connector' into…
Daemonxiao Aug 17, 2022
3777577
Merge branch 'master' into destination-tidb-connector
Daemonxiao Aug 18, 2022
81e2227
fix integration test
Daemonxiao Aug 22, 2022
beb3b1f
Merge remote-tracking branch 'origin/destination-tidb-connector' into…
Daemonxiao Aug 22, 2022
c63bce8
Merge branch 'master' into destination-tidb-connector
Daemonxiao Aug 22, 2022
cfe9b0c
Merge remote-tracking branch 'origin/master' into destination-tidb-co…
Daemonxiao Aug 23, 2022
a27efb3
Merge remote-tracking branch 'origin/destination-tidb-connector' into…
Daemonxiao Aug 23, 2022
0014d77
Update docs/integrations/destinations/tidb.md
Daemonxiao Aug 23, 2022
64efa0c
Update doc
Daemonxiao Aug 23, 2022
f5ec80e
Update doc
Daemonxiao Aug 23, 2022
327c7e2
Update doc
Daemonxiao Aug 24, 2022
3c78e0b
Merge branch 'master' into destination-tidb-connector
Daemonxiao Aug 30, 2022
72c46d6
Merge branch 'master' into destination-tidb-connector
marcosmarxm Aug 30, 2022
2e68328
bump normalization version
marcosmarxm Aug 31, 2022
25e1e51
update normalization changelog
marcosmarxm Aug 31, 2022
11eef5f
run format
marcosmarxm Aug 31, 2022
662deda
Merge branch 'master' into destination-tidb-connector
marcosmarxm Aug 31, 2022
bc12094
Merge branch 'master' into destination-tidb-connector
marcosmarxm Aug 31, 2022
1789170
add dest def
marcosmarxm Aug 31, 2022
ba2de9a
generat spec
marcosmarxm Aug 31, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions airbyte-integrations/bases/base-normalization/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -79,13 +79,18 @@ task airbyteDockerRedshift(type: Exec, dependsOn: checkSshScriptCopy) {
configure buildAirbyteDocker('redshift')
dependsOn assemble
}
task airbyteDockerTiDB(type: Exec, dependsOn: checkSshScriptCopy) {
configure buildAirbyteDocker('tidb')
dependsOn assemble
}

airbyteDocker.dependsOn(airbyteDockerMSSql)
airbyteDocker.dependsOn(airbyteDockerMySql)
airbyteDocker.dependsOn(airbyteDockerOracle)
airbyteDocker.dependsOn(airbyteDockerClickhouse)
airbyteDocker.dependsOn(airbyteDockerSnowflake)
airbyteDocker.dependsOn(airbyteDockerRedshift)
airbyteDocker.dependsOn(airbyteDockerTiDB)

task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs) {
module = "pytest"
Expand All @@ -100,6 +105,7 @@ task("customIntegrationTestPython", type: PythonTask, dependsOn: installTestReqs
dependsOn ':airbyte-integrations:connectors:destination-oracle:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'
}

// not really sure what this task does differently from customIntegrationTestPython, but it seems to also run integration tests
Expand All @@ -113,6 +119,7 @@ project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-oracle:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-mssql:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-clickhouse:airbyteDocker'
project.tasks.findByName('_customIntegrationTestsCoverage').dependsOn ':airbyte-integrations:connectors:destination-tidb:airbyteDocker'

// DATs have some additional tests that exercise normalization code paths,
// so we want to run these in addition to the base-normalization integration tests.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# This file is necessary to install dbt-utils with dbt deps
# the content will be overwritten by the transform function

# Name your package! Package names should contain only lowercase characters
# and underscores. A good package name should reflect your organization"s
# name or the intended use of these models
name: "airbyte_utils"
version: "1.0"
config-version: 2

# This setting configures which "profile" dbt uses for this project. Profiles contain
# database connection information, and should be configured in the ~/.dbt/profiles.yml file
profile: "normalize"

# These configurations specify where dbt should look for different types of files.
# The `model-paths` config, for example, states that source models can be found
# in the "models/" directory. You probably won"t need to change these!
model-paths: ["models"]
docs-paths: ["docs"]
analysis-paths: ["analysis"]
test-paths: ["tests"]
seed-paths: ["data"]
macro-paths: ["macros"]

target-path: "../build" # directory which will store compiled SQL files
log-path: "../logs" # directory which will store DBT logs
packages-install-path: "/dbt" # directory which will store external DBT dependencies

clean-targets: # directories to be removed by `dbt clean`
- "build"
- "dbt_modules"

quoting:
database: true
# Temporarily disabling the behavior of the ExtendedNameTransformer on table/schema names, see (issue #1785)
# all schemas should be unquoted
schema: false
identifier: true

# You can define configurations for models in the `model-paths` directory here.
# Using these configurations, you can enable or disable models, change how they
# are materialized, and more!
models:
airbyte_utils:
+materialized: table
generated:
airbyte_ctes:
+tags: airbyte_internal_cte
+materialized: ephemeral
airbyte_incremental:
+tags: incremental_tables
+materialized: incremental
airbyte_tables:
+tags: normalized_tables
+materialized: table
airbyte_views:
+tags: airbyte_internal_views
+materialized: view

vars:
dbt_utils_dispatch_list: ["airbyte_utils"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# add dependencies. these will get pulled during the `dbt deps` process.

packages:
- git: "https://github.com/fishtown-analytics/dbt-utils.git"
revision: 0.8.2
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}

{% macro tidb__cross_join_unnest(stream_name, array_col) -%}
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}

{% macro redshift__cross_join_unnest(stream_name, array_col) -%}
left join joined on _airbyte_{{ stream_name }}_hashid = joined._airbyte_hashid
{%- endmacro %}
Expand Down Expand Up @@ -87,6 +91,10 @@
_airbyte_nested_data
{%- endmacro %}

{% macro tidb__unnested_column_value(column_col) -%}
_airbyte_nested_data
{%- endmacro %}

{% macro oracle__unnested_column_value(column_col) -%}
{{ column_col }}
{%- endmacro %}
Expand Down Expand Up @@ -181,3 +189,7 @@ joined as (
where numbers.generated_number <= json_length({{ column_col }})
)
{%- endmacro %}

{% macro tidb__unnest_cte(from_table, stream_name, column_col) -%}
{{ mysql__unnest_cte(from_table, stream_name, column_col) }}
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,7 @@

concat({{ concat_chunks|join(', ') }}, '')
{%- endmacro %}

{% macro tidb__concat(fields) -%}
concat({{ fields|join(', ') }})
{%- endmacro %}
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@
String
{% endmacro %}

{%- macro tidb__type_json() -%}
json
{%- endmacro -%}


{# string ------------------------------------------------- #}

Expand All @@ -64,6 +68,10 @@
text
{%- endmacro -%}

{%- macro tidb__type_string() -%}
char(1000)
{%- endmacro -%}

{# float ------------------------------------------------- #}
{% macro mysql__type_float() %}
float
Expand All @@ -77,6 +85,9 @@
Float64
{% endmacro %}

{% macro tidb__type_float() %}
float
{% endmacro %}

{# int ------------------------------------------------- #}
{% macro default__type_int() %}
Expand All @@ -95,6 +106,9 @@
INT
{% endmacro %}

{% macro tidb__type_int() %}
signed
{% endmacro %}

{# bigint ------------------------------------------------- #}
{% macro mysql__type_bigint() %}
Expand All @@ -109,6 +123,9 @@
BIGINT
{% endmacro %}

{% macro tidb__type_bigint() %}
signed
{% endmacro %}

{# numeric ------------------------------------------------- --#}
{% macro mysql__type_numeric() %}
Expand All @@ -119,6 +136,9 @@
Float64
{% endmacro %}

{% macro tidb__type_numeric() %}
float
{% endmacro %}

{# very_large_integer --------------------------------------- --#}
{#
Expand Down Expand Up @@ -146,6 +166,10 @@ so this macro needs to be called very_large_integer.
decimal128(0)
{% endmacro %}

{% macro tidb__type_very_large_integer() %}
decimal(38, 0)
{% endmacro %}

{# timestamp ------------------------------------------------- --#}
{% macro mysql__type_timestamp() %}
time
Expand All @@ -161,6 +185,9 @@ so this macro needs to be called very_large_integer.
DateTime64
{% endmacro %}

{% macro tidb__type_timestamp() %}
time
{% endmacro %}

{# timestamp with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -198,6 +225,9 @@ so this macro needs to be called very_large_integer.
DateTime64
{% endmacro %}

{%- macro tidb__type_timestamp_with_timezone() -%}
char(1000)
{%- endmacro -%}

{# timestamp without time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -227,6 +257,9 @@ so this macro needs to be called very_large_integer.
TIMESTAMP
{% endmacro %}

{% macro tidb__type_timestamp_without_timezone() %}
datetime
{% endmacro %}

{# time without time zone ------------------------------------------------- #}

Expand All @@ -250,6 +283,10 @@ so this macro needs to be called very_large_integer.
String
{% endmacro %}

{% macro tidb__type_time_without_timezone() %}
time
{% endmacro %}


{# time with time zone ------------------------------------------------- #}

Expand Down Expand Up @@ -289,6 +326,9 @@ so this macro needs to be called very_large_integer.
String
{% endmacro %}

{%- macro tidb__type_time_with_timezone() -%}
char(1000)
{%- endmacro -%}

{# date ------------------------------------------------- #}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- Postgres: json_extract_path_text(<from_json>, 'path' [, 'path' [, ...}}) -> https://www.postgresql.org/docs/12/functions-json.html
- MySQL: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://dev.mysql.com/doc/refman/8.0/en/json-search-functions.html
- ClickHouse: JSONExtractString(json_doc, 'path' [, 'path'] ...) -> https://clickhouse.com/docs/en/sql-reference/functions/json-functions/
- TiDB: JSON_EXTRACT(json_doc, 'path' [, 'path'] ...) -> https://docs.pingcap.com/tidb/stable/json-functions
#}

{# format_json_path -------------------------------------------------- #}
Expand Down Expand Up @@ -97,6 +98,11 @@
{{ "'" ~ str_list|join("','") ~ "'" }}
{%- endmacro %}

{% macro tidb__format_json_path(json_path_list) -%}
{# -- '$."x"."y"."z"' #}
{{ "'$.\"" ~ json_path_list|join(".") ~ "\"'" }}
{%- endmacro %}

{# json_extract ------------------------------------------------- #}

{% macro json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -166,6 +172,14 @@
{% endif -%}
{%- endmacro %}

{% macro tidb__json_extract(from_table, json_column, json_path_list, normalized_json_path) -%}
{%- if from_table|string() == '' %}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% else %}
json_extract({{ from_table }}.{{ json_column }}, {{ format_json_path(normalized_json_path) }})
{% endif -%}
{%- endmacro %}

{# json_extract_scalar ------------------------------------------------- #}

{% macro json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -212,6 +226,14 @@
JSONExtractRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro tidb__json_extract_scalar(json_column, json_path_list, normalized_json_path) -%}
IF(
JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }})) = 'null',
NULL,
JSON_UNQUOTE(JSON_EXTRACT({{ json_column }}, {{ format_json_path(normalized_json_path) }}))
)
{%- endmacro %}

{# json_extract_array ------------------------------------------------- #}

{% macro json_extract_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down Expand Up @@ -258,6 +280,10 @@
JSONExtractArrayRaw(assumeNotNull({{ json_column }}), {{ format_json_path(json_path_list) }})
{%- endmacro %}

{% macro tidb__json_extract_array(json_column, json_path_list, normalized_json_path) -%}
json_extract({{ json_column }}, {{ format_json_path(normalized_json_path) }})
{%- endmacro %}

{# json_extract_string_array ------------------------------------------------- #}

{% macro json_extract_string_array(json_column, json_path_list, normalized_json_path) -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,11 @@
IF(lower({{ field }}) = 'true', true, false)
{%- endmacro %}

{# TiDB does not support cast string to boolean #}
{% macro tidb__cast_to_boolean(field) -%}
IF(lower({{ field }}) = 'true', true, false)
{%- endmacro %}

{# -- Redshift does not support converting string directly to boolean, it must go through int first #}
{% macro redshift__cast_to_boolean(field) -%}
{% if redshift_super_type() -%}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,10 @@ services:
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
normalization-tidb:
image: airbyte/normalization-tidb:${VERSION}
build:
dockerfile: tidb.Dockerfile
context: .
labels:
io.airbyte.git-revision: ${GIT_REVISION}
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,5 @@ services:
image: airbyte/normalization-snowflake:${VERSION}
normalization-redshift:
image: airbyte/normalization-redshift:${VERSION}
normalization-tidb:
image: airbyte/normalization-tidb:${VERSION}
Loading