Skip to content

Commit 241cf4a

Browse files
catpineapplemarcosmarxmoctavia-squidington-iii
authored
🎉 New Destination: Apache Doris (#17884)
* first commit * normalized code and integration test * add bootstrap.md * add doris to destination def * auto-bump connector version * format files Co-authored-by: marcosmarxm <[email protected]> Co-authored-by: Octavia Squidington III <[email protected]>
1 parent b52caa0 commit 241cf4a

File tree

24 files changed

+1647
-0
lines changed

24 files changed

+1647
-0
lines changed

airbyte-config/init/src/main/resources/seed/destination_definitions.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,12 @@
1818
documentationUrl: https://docs.airbyte.com/integrations/destinations/amazon-sqs
1919
icon: amazonsqs.svg
2020
releaseStage: alpha
21+
- name: Apache Doris
22+
destinationDefinitionId: 05c161bf-ca73-4d48-b524-d392be417002
23+
dockerRepository: airbyte/destination-doris
24+
dockerImageTag: 0.1.0
25+
documentationUrl: https://docs.airbyte.com/integrations/destinations/doris
26+
releaseStage: alpha
2127
- name: AWS Datalake
2228
destinationDefinitionId: 99878c90-0fbd-46d3-9d98-ffde879d17fc
2329
dockerRepository: airbyte/destination-aws-datalake

airbyte-config/init/src/main/resources/seed/destination_specs.yaml

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,67 @@
188188
supportsDBT: false
189189
supported_destination_sync_modes:
190190
- "append"
191+
- dockerImage: "airbyte/destination-doris:0.1.0"
192+
spec:
193+
documentationUrl: "https://docs.airbyte.io/integrations/destinations/doris"
194+
connectionSpecification:
195+
$schema: "http://json-schema.org/draft-07/schema#"
196+
title: "Doris Destination Spec"
197+
type: "object"
198+
required:
199+
- "host"
200+
- "httpport"
201+
- "queryport"
202+
- "username"
203+
- "database"
204+
properties:
205+
host:
206+
title: "Host"
207+
description: "Hostname of the database"
208+
type: "string"
209+
order: 0
210+
httpport:
211+
title: "HttpPort"
212+
description: "Http Port of the database."
213+
type: "integer"
214+
minimum: 0
215+
maximum: 65536
216+
default: 8030
217+
examples:
218+
- "8030"
219+
order: 1
220+
queryport:
221+
title: "QueryPort"
222+
description: "Query(SQL) Port of the database."
223+
type: "integer"
224+
minimum: 0
225+
maximum: 65536
226+
default: 9030
227+
examples:
228+
- "9030"
229+
order: 2
230+
database:
231+
title: "DataBase Name"
232+
description: "Name of the database."
233+
type: "string"
234+
order: 3
235+
username:
236+
title: "UserName"
237+
description: "Username to use to access the database."
238+
type: "string"
239+
order: 4
240+
password:
241+
title: "Password"
242+
description: "Password associated with the username."
243+
type: "string"
244+
airbyte_secret: true
245+
order: 5
246+
supportsIncremental: false
247+
supportsNormalization: false
248+
supportsDBT: false
249+
supported_destination_sync_modes:
250+
- "append"
251+
- "overwrite"
191252
- dockerImage: "airbyte/destination-aws-datalake:0.1.1"
192253
spec:
193254
documentationUrl: "https://docs.airbyte.com/integrations/destinations/aws-datalake"
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
*
2+
!Dockerfile
3+
!build
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
FROM airbyte/integration-base-java:dev AS build
2+
3+
WORKDIR /airbyte
4+
ENV APPLICATION destination-doris
5+
6+
COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar
7+
8+
RUN tar xf ${APPLICATION}.tar --strip-components=1 && rm -rf ${APPLICATION}.tar
9+
10+
FROM airbyte/integration-base-java:dev
11+
12+
WORKDIR /airbyte
13+
ENV APPLICATION destination-doris
14+
15+
COPY --from=build /airbyte /airbyte
16+
17+
LABEL io.airbyte.version=0.1.0
18+
LABEL io.airbyte.name=airbyte/destination-doris
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# Destination Doris
2+
3+
This is the repository for the Doris destination connector in Java.
4+
For information about how to use this connector within Airbyte, see [the User Documentation](https://docs.airbyte.io/integrations/destinations/doris).
5+
6+
## Local development
7+
8+
#### Building via Gradle
9+
From the Airbyte repository root, run:
10+
```
11+
./gradlew :airbyte-integrations:connectors:destination-doris:build
12+
```
13+
14+
#### Create credentials
15+
**If you are a community contributor**, generate the necessary credentials and place them in `secrets/config.json` conforming to the spec file in `src/main/resources/spec.json`.
16+
Note that the `secrets` directory is git-ignored by default, so there is no danger of accidentally checking in sensitive information.
17+
18+
**If you are an Airbyte core member**, follow the [instructions](https://docs.airbyte.io/connector-development#using-credentials-in-ci) to set up the credentials.
19+
20+
### Locally running the connector docker image
21+
22+
#### Build
23+
Build the connector image via Gradle:
24+
```
25+
./gradlew :airbyte-integrations:connectors:destination-doris:airbyteDocker
26+
```
27+
When building via Gradle, the docker image name and tag, respectively, are the values of the `io.airbyte.name` and `io.airbyte.version` `LABEL`s in
28+
the Dockerfile.
29+
30+
#### Run
31+
Then run any of the connector commands as follows:
32+
```
33+
docker run --rm airbyte/destination-doris:dev spec
34+
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev check --config /secrets/config.json
35+
docker run --rm -v $(pwd)/secrets:/secrets airbyte/destination-doris:dev discover --config /secrets/config.json
36+
docker run --rm -v $(pwd)/secrets:/secrets -v $(pwd)/integration_tests:/integration_tests airbyte/destination-doris:dev read --config /secrets/config.json --catalog /integration_tests/configured_catalog.json
37+
```
38+
39+
## Testing
40+
We use `JUnit` for Java tests.
41+
42+
### Unit and Integration Tests
43+
Place unit tests under `src/test/io/airbyte/integrations/destinations/doris`.
44+
45+
#### Acceptance Tests
46+
Airbyte has a standard test suite that all destination connectors must pass. Implement the `TODO`s in
47+
`src/test-integration/java/io/airbyte/integrations/destinations/dorisDestinationAcceptanceTest.java`.
48+
49+
### Using gradle to run tests
50+
All commands should be run from airbyte project root.
51+
To run unit tests:
52+
```
53+
./gradlew :airbyte-integrations:connectors:destination-doris:unitTest
54+
```
55+
To run acceptance and custom integration tests:
56+
```
57+
./gradlew :airbyte-integrations:connectors:destination-doris:integrationTest
58+
```
59+
60+
## Dependency Management
61+
62+
### Publishing a new version of the connector
63+
You've checked out the repo, implemented a million dollar feature, and you're ready to share your changes with the world. Now what?
64+
1. Make sure your changes are passing unit and integration tests.
65+
1. Bump the connector version in `Dockerfile` -- just increment the value of the `LABEL io.airbyte.version` appropriately (we use [SemVer](https://semver.org/)).
66+
1. Create a Pull Request.
67+
1. Pat yourself on the back for being an awesome contributor.
68+
1. Someone from Airbyte will take a look at your PR and iterate with you to merge it into master.
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Doris destination
2+
3+
4+
Doris destination adopts MySQL protocol(JDBC) and Doris Stream Load to exchange data.
5+
6+
1. JDBC is used to manipulate the data table structure and execute the create table statement before data import
7+
2. Stream Load is a synchronous import method based on HTTP/HTTPS, For Doris destination, first pre-write csv file, and then write to doris with Stream Load transaction operation.
8+
9+
## Introduction to Apache Doris
10+
11+
Apache Doris is a high-performance, real-time analytical database based on MPP architecture, known for its extreme speed and ease of use. It only requires a sub-second response time to return query results under massive data and can support not only high-concurrent point query scenarios but also high-throughput complex analysis scenarios. Based on this, Apache Doris can better meet the scenarios of report analysis, ad-hoc query, unified data warehouse, Data Lake Query Acceleration, etc. Users can build user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, order analysis, and other applications on top of this.
12+
[https://doris.apache.org/docs/summary/basic-summary](https://doris.apache.org/docs/summary/basic-summary)
13+
14+
15+
## Technical Overview
16+
The overall architecture of Apache Doris is shown in the following figure. The Doris architecture is very simple, with only two types of processes.
17+
18+
#### Frontend(FE):
19+
##### It is mainly responsible for user request access, query parsing and planning, management of metadata, and node management-related work.
20+
#### Backend(BE):
21+
##### It is mainly responsible for data storage and query plan execution.
22+
23+
Both types of processes are horizontally scalable, and a single cluster can support up to hundreds of machines and tens of petabytes of storage capacity. And these two types of processes guarantee high availability of services and high reliability of data through consistency protocols. This highly integrated architecture design greatly reduces the operation and maintenance cost of a distributed system.
24+
25+
Apache Doris adopts MySQL protocol, highly compatible with MySQL dialect, and supports standard SQL. Users can access Doris through various client tools and support seamless connection with BI tools.
26+
27+
[Stream load](https://doris.apache.org/docs/data-operate/import/import-way/stream-load-manual/) is a synchronous way of importing. Users import local files or data streams into Doris by sending HTTP protocol requests. Stream load synchronously executes the import and returns the import result. Users can directly determine whether the import is successful by the return body of the request. Stream load is mainly suitable for importing local files or data from data streams through procedures.
28+
29+
Each import job of Doris, whether it is batch import using Stream Load or single import using INSERT statement, is a complete transaction operation. The import transaction can ensure that the data in a batch takes effect atomically, and there will be no partial data writing.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
plugins {
2+
id 'application'
3+
id 'airbyte-docker'
4+
id 'airbyte-integration-test-java'
5+
}
6+
7+
application {
8+
mainClass = 'io.airbyte.integrations.destination.doris.DorisDestination'
9+
}
10+
11+
dependencies {
12+
implementation 'org.apache.commons:commons-csv:1.4'
13+
implementation group: 'mysql', name: 'mysql-connector-java', version: '8.0.16'
14+
implementation project(':airbyte-config:config-models')
15+
implementation project(':airbyte-protocol:protocol-models')
16+
implementation project(':airbyte-integrations:bases:base-java')
17+
implementation files(project(':airbyte-integrations:bases:base-java').airbyteDocker.outputs)
18+
19+
integrationTestJavaImplementation project(':airbyte-integrations:bases:standard-destination-test')
20+
integrationTestJavaImplementation project(':airbyte-integrations:connectors:destination-doris')
21+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* Copyright (c) 2022 Airbyte, Inc., all rights reserved.
3+
*/
4+
5+
package io.airbyte.integrations.destination.doris;
6+
7+
import com.fasterxml.jackson.databind.JsonNode;
8+
9+
public class DorisConnectionOptions {
10+
11+
private String db;
12+
private static String DB_KEY = "database";
13+
private String table;
14+
private static final String TABLE_KEY = "table";
15+
16+
private String user;
17+
private static final String USER_KEY = "username";
18+
19+
private String pwd;
20+
private static final String PWD_KEY = "password";
21+
22+
private String feHost;
23+
private static final String FE_HOST_KEY = "host";
24+
25+
private Integer feHttpPort;
26+
private static final String FE_HTTP_PORT_KEY = "httpport";
27+
28+
private Integer feQueryPort;
29+
private static final String FE_QUERY_PORT_KEY = "queryport";
30+
31+
public static DorisConnectionOptions getDorisConnection(final JsonNode config, String table) {
32+
return new DorisConnectionOptions(
33+
config.get(DB_KEY).asText(),
34+
table,
35+
config.get(USER_KEY).asText(),
36+
config.get(PWD_KEY) == null ? "" : config.get(PWD_KEY).asText(),
37+
config.get(FE_HOST_KEY).asText(),
38+
config.get(FE_HTTP_PORT_KEY).asInt(8030),
39+
config.get(FE_QUERY_PORT_KEY).asInt(9030));
40+
41+
}
42+
43+
public DorisConnectionOptions(String db, String table, String user, String pwd, String feHost, Integer feHttpPort, Integer feQueryPort) {
44+
this.db = db;
45+
this.table = table;
46+
this.user = user;
47+
this.pwd = pwd;
48+
this.feHost = feHost;
49+
this.feHttpPort = feHttpPort;
50+
this.feQueryPort = feQueryPort;
51+
}
52+
53+
public String getDb() {
54+
return db;
55+
}
56+
57+
public String getTable() {
58+
return table;
59+
}
60+
61+
public String getUser() {
62+
return user;
63+
}
64+
65+
public String getPwd() {
66+
return pwd;
67+
}
68+
69+
public String getFeHost() {
70+
return feHost;
71+
}
72+
73+
public Integer getFeHttpPort() {
74+
return feHttpPort;
75+
}
76+
77+
public String getHttpHostPort() {
78+
return feHost + ":" + feHttpPort;
79+
}
80+
81+
public String getQueryHostPort() {
82+
return feHost + ":" + feHttpPort;
83+
}
84+
85+
public Integer getFeQueryPort() {
86+
return feQueryPort;
87+
}
88+
89+
@Override
90+
public String toString() {
91+
return "DorisConnectionOptions{" +
92+
"db='" + db + '\'' +
93+
", table='" + table + '\'' +
94+
", user='" + user + '\'' +
95+
", pwd='" + pwd + '\'' +
96+
", feHost='" + feHost + '\'' +
97+
", feHttpPort=" + feHttpPort +
98+
", feQueryPort=" + feQueryPort +
99+
'}';
100+
}
101+
102+
}

0 commit comments

Comments
 (0)