Skip to content

🎉 New Destination: Apache Iceberg #18836

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 24 commits into from
Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
379be3b
wip: developing Iceberg(s3 & hive catalog) Destination
Leibnizhu Oct 26, 2022
ed48721
wip: developing Iceberg(s3 & hive catalog) Destination 2
Leibnizhu Oct 26, 2022
e1f2b34
wip: developing Iceberg(s3 & hive catalog) Destination 3
Leibnizhu Oct 27, 2022
439ca06
wip: developing Iceberg(s3 & hive catalog) Destination 3
Leibnizhu Oct 28, 2022
2576799
wip: developing Iceberg(s3 & hive catalog) Destination 2
Leibnizhu Oct 28, 2022
70e5aa5
refactor: config
Leibnizhu Oct 31, 2022
976fdda
feat: add hadoop and jdbc catalog implements
Leibnizhu Nov 1, 2022
fa2f84d
docs: add docs and config examples
Leibnizhu Nov 1, 2022
b946fb1
style
Leibnizhu Nov 1, 2022
7f3256b
feat: S3Config
Leibnizhu Nov 1, 2022
3fbc7b8
fix: acceptance test, and unit test
Leibnizhu Nov 2, 2022
eee7194
chore: remove sensitive logs
Leibnizhu Nov 2, 2022
7b34ec3
docs: builds.md
Leibnizhu Nov 2, 2022
6c2618d
refactor: 1.add flush batch size and auto compact configs 2.refactor …
Leibnizhu Nov 2, 2022
9c92e75
test: add integration test
Leibnizhu Nov 3, 2022
1965e26
test: Add HadoopCatalog integration tests
Leibnizhu Nov 3, 2022
35afab0
docs: add bootstrap.md
Leibnizhu Nov 3, 2022
a26afbd
test: Add HiveCatalog integration tests
Leibnizhu Nov 4, 2022
d77d3f9
perf: purge drop temp Iceberg table
Leibnizhu Nov 4, 2022
d8de9ac
chore: delete unnecessary log
Leibnizhu Nov 5, 2022
6ab6bfc
remove iceberg accpt test file
marcosmarxm Nov 18, 2022
fa17207
run format
marcosmarxm Nov 18, 2022
8020724
readd iceberg
marcosmarxm Nov 18, 2022
dde1a8a
regenrate spec
marcosmarxm Nov 18, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.com/integrations/destinations/doris
releaseStage: alpha
- name: Apache Iceberg
destinationDefinitionId: df65a8f3-9908-451b-aa9b-445462803560
dockerRepository: airbyte/destination-iceberg
dockerImageTag: 0.1.0
documentationUrl: https://docs.airbyte.com/integrations/destinations/iceberg
releaseStage: alpha
- name: AWS Datalake
destinationDefinitionId: 99878c90-0fbd-46d3-9d98-ffde879d17fc
dockerRepository: airbyte/destination-aws-datalake
Expand Down
275 changes: 275 additions & 0 deletions airbyte-config/init/src/main/resources/seed/destination_specs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,281 @@
supported_destination_sync_modes:
- "append"
- "overwrite"
- dockerImage: "airbyte/destination-iceberg:0.1.0"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/destinations/iceberg"
connectionSpecification:
$schema: "http://json-schema.org/draft-07/schema#"
title: "Iceberg Destination Spec"
type: "object"
required:
- "catalog_config"
- "storage_config"
- "format_config"
properties:
catalog_config:
title: "Iceberg catalog config"
type: "object"
description: "Catalog config of Iceberg."
oneOf:
- title: "HiveCatalog: Use Apache Hive MetaStore"
required:
- "catalog_type"
- "hive_thrift_uri"
properties:
catalog_type:
title: "Catalog Type"
type: "string"
default: "Hive"
enum:
- "Hive"
order: 0
hive_thrift_uri:
title: "Hive Metastore thrift uri"
type: "string"
description: "Hive MetaStore thrift server uri of iceberg catalog."
examples:
- "host:port"
order: 1
database:
title: "Default database"
description: "The default database tables are written to if the source\
\ does not specify a namespace. The usual value for this field is\
\ \"default\"."
type: "string"
default: "default"
examples:
- "default"
order: 2
- title: "HadoopCatalog: Use hierarchical file systems as same as storage\
\ config"
description: "A Hadoop catalog doesn’t need to connect to a Hive MetaStore,\
\ but can only be used with HDFS or similar file systems that support\
\ atomic rename."
required:
- "catalog_type"
properties:
catalog_type:
title: "Catalog Type"
type: "string"
default: "Hadoop"
enum:
- "Hadoop"
order: 0
database:
title: "Default database"
description: "The default database tables are written to if the source\
\ does not specify a namespace. The usual value for this field is\
\ \"default\"."
type: "string"
default: "default"
examples:
- "default"
order: 1
- title: "JdbcCatalog: Use relational database"
description: "Using a table in a relational database to manage Iceberg\
\ tables through JDBC. Read more <a href=\"https://iceberg.apache.org/docs/latest/jdbc/\"\
>here</a>. Supporting: PostgreSQL"
required:
- "catalog_type"
properties:
catalog_type:
title: "Catalog Type"
type: "string"
default: "Jdbc"
enum:
- "Jdbc"
order: 0
database:
title: "Default schema"
description: "The default schema tables are written to if the source\
\ does not specify a namespace. The usual value for this field is\
\ \"public\"."
type: "string"
default: "public"
examples:
- "public"
order: 1
jdbc_url:
title: "Jdbc url"
type: "string"
examples:
- "jdbc:postgresql://{host}:{port}/{database}"
order: 2
username:
title: "User"
description: "Username to use to access the database."
type: "string"
order: 3
password:
title: "Password"
description: "Password associated with the username."
type: "string"
airbyte_secret: true
order: 4
ssl:
title: "SSL Connection"
description: "Encrypt data using SSL. When activating SSL, please\
\ select one of the connection modes."
type: "boolean"
default: false
order: 5
catalog_schema:
title: "schema for Iceberg catalog"
description: "Iceberg catalog metadata tables are written to catalog\
\ schema. The usual value for this field is \"public\"."
type: "string"
default: "public"
examples:
- "public"
order: 6
order: 0
storage_config:
title: "Storage config"
type: "object"
description: "Storage config of Iceberg."
oneOf:
- title: "S3"
type: "object"
description: "S3 object storage"
required:
- "storage_type"
- "access_key_id"
- "secret_access_key"
- "s3_warehouse_uri"
properties:
storage_type:
title: "Storage Type"
type: "string"
default: "S3"
enum:
- "S3"
order: 0
access_key_id:
type: "string"
description: "The access key ID to access the S3 bucket. Airbyte requires\
\ Read and Write permissions to the given bucket. Read more <a href=\"\
https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys\"\
>here</a>."
title: "S3 Key ID"
airbyte_secret: true
examples:
- "A012345678910EXAMPLE"
order: 0
secret_access_key:
type: "string"
description: "The corresponding secret to the access key ID. Read\
\ more <a href=\"https://docs.aws.amazon.com/general/latest/gr/aws-sec-cred-types.html#access-keys-and-secret-access-keys\"\
>here</a>"
title: "S3 Access Key"
airbyte_secret: true
examples:
- "a012345678910ABCDEFGH/AbCdEfGhEXAMPLEKEY"
order: 1
s3_warehouse_uri:
title: "S3 Warehouse Uri for Iceberg"
type: "string"
description: "The Warehouse Uri for Iceberg"
examples:
- "s3a://my-bucket/path/to/warehouse"
- "s3://my-bucket/path/to/warehouse"
order: 2
s3_bucket_region:
title: "S3 Bucket Region"
type: "string"
default: ""
description: "The region of the S3 bucket. See <a href=\"https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/using-regions-availability-zones.html#concepts-available-regions\"\
>here</a> for all region codes."
enum:
- ""
- "us-east-1"
- "us-east-2"
- "us-west-1"
- "us-west-2"
- "af-south-1"
- "ap-east-1"
- "ap-south-1"
- "ap-northeast-1"
- "ap-northeast-2"
- "ap-northeast-3"
- "ap-southeast-1"
- "ap-southeast-2"
- "ca-central-1"
- "cn-north-1"
- "cn-northwest-1"
- "eu-central-1"
- "eu-north-1"
- "eu-south-1"
- "eu-west-1"
- "eu-west-2"
- "eu-west-3"
- "sa-east-1"
- "me-south-1"
- "us-gov-east-1"
- "us-gov-west-1"
order: 3
s3_endpoint:
title: "Endpoint"
type: "string"
default: ""
description: "Your S3 endpoint url. Read more <a href=\"https://docs.aws.amazon.com/general/latest/gr/s3.html#:~:text=Service%20endpoints-,Amazon%20S3%20endpoints,-When%20you%20use\"\
>here</a>"
examples:
- "http://localhost:9000"
- "localhost:9000"
order: 4
s3_path_style_access:
type: "boolean"
description: "Use path style access"
examples:
- true
- false
default: true
order: 5
order: 1
format_config:
title: "File format"
type: "object"
required:
- "format"
description: "File format of Iceberg storage."
properties:
format:
title: "File storage format"
type: "string"
default: "Parquet"
description: ""
enum:
- "Parquet"
- "Avro"
order: 0
flush_batch_size:
title: "Data file flushing batch size"
description: "Iceberg data file flush batch size. Incoming rows write\
\ to cache firstly; When cache size reaches this 'batch size', flush\
\ into real Iceberg data file."
type: "integer"
default: 10000
order: 1
auto_compact:
title: "Auto compact data files"
description: "Auto compact data files when stream close"
type: "boolean"
default: false
order: 2
compact_target_file_size_in_mb:
title: "Target size of compacted data file"
description: "Specify the target size of Iceberg data file when performing\
\ a compaction action. "
type: "integer"
default: 100
order: 3
order: 2
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes:
- "overwrite"
- "append"
- dockerImage: "airbyte/destination-aws-datalake:0.1.1"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/destinations/aws-datalake"
Expand Down
3 changes: 2 additions & 1 deletion airbyte-integrations/builds.md
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,8 @@
| Google Cloud Storage (GCS) | [![destination-gcs](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-gcs%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-gcs) |
| Google Firestore | [![destination-firestore](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-firestore%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-firestore) |
| Google PubSub | [![destination-pubsub](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-pubsub%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-pubsub) |
| Google Sheets | [![destination-sheets](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-sheets%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-sheets) |
| Google Sheets | [![destination-sheets](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-sheets%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-sheets) | |
| Apache Iceberg | [![destination-iceberg](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-iceberg%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-iceberg)
| Kafka | [![destination-kafka](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-kafka%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-kafka) |
| Keen (Chargify) | [![destination-keen](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-keen%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-keen) |
| Local CSV | [![destination-csv](https://img.shields.io/endpoint?url=https%3A%2F%2Fdnsgjos7lj2fu.cloudfront.net%2Ftests%2Fsummary%2Fdestination-csv%2Fbadge.json)](https://dnsgjos7lj2fu.cloudfront.net/tests/summary/destination-csv) |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
*
!Dockerfile
!build
25 changes: 25 additions & 0 deletions airbyte-integrations/connectors/destination-iceberg/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
FROM airbyte/integration-base-java:dev AS build

WORKDIR /airbyte
ENV APPLICATION destination-iceberg

COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar

FROM airbyte/integration-base-java:dev

WORKDIR /airbyte
ENV APPLICATION destination-iceberg

ENV JAVA_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED \
--add-opens java.base/java.util=ALL-UNNAMED \
--add-opens java.base/java.lang.reflect=ALL-UNNAMED \
--add-opens java.base/java.text=ALL-UNNAMED \
--add-opens java.base/sun.nio.ch=ALL-UNNAMED \
--add-opens java.base/java.nio=ALL-UNNAMED "

COPY --from=build /airbyte /airbyte

LABEL io.airbyte.version=0.1.0
LABEL io.airbyte.name=airbyte/destination-iceberg
Loading