Skip to content

Commit 4863ea1

Browse files
authored
introduce common abstraction for CDC via debezium (#4580)
* wip * add file * final structure * few more updates * undo unwanted changes * add abstract test + more refinement * remove CDC metadata to debezium * rename class + add missing property * move debezium to bases + upgrade debezium version + review comments * downgrade version + minor fixes * reset to minutes * fix build * address review comments * should return Optional * use common abstraction for CDC via debezium for mysql (#4604) * use new cdc abstraction for mysql * undo wanted change * pull in latest changes * use renamed class + move constants to MySqlSource * bring in latest changes from cdc abstraction * format * bring in latest changes * pull in latest changes * use common abstraction for CDC via debezium for postgres (#4607) * use cdc abstraction for postgres * add files * ready * use renamed class + move constants to PostgresSource * bring in the latest changes * bring in latest changes * pull in latest changes
1 parent 078de48 commit 4863ea1

File tree

47 files changed

+1964
-2015
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+1964
-2015
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
plugins {
2+
id "java-test-fixtures"
3+
}
4+
5+
project.configurations {
6+
testFixturesImplementation.extendsFrom implementation
7+
}
8+
dependencies {
9+
implementation project(':airbyte-protocol:models')
10+
11+
implementation 'io.debezium:debezium-api:1.4.2.Final'
12+
implementation 'io.debezium:debezium-embedded:1.4.2.Final'
13+
implementation 'io.debezium:debezium-connector-mysql:1.4.2.Final'
14+
implementation 'io.debezium:debezium-connector-postgres:1.4.2.Final'
15+
16+
testFixturesImplementation project(':airbyte-db')
17+
testFixturesImplementation project(':airbyte-integrations:bases:base-java')
18+
19+
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-engine:5.4.2'
20+
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-api:5.4.2'
21+
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-params:5.4.2'
22+
23+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,143 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2020 Airbyte
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package io.airbyte.integrations.debezium;
26+
27+
import com.fasterxml.jackson.databind.JsonNode;
28+
import io.airbyte.commons.util.AutoCloseableIterator;
29+
import io.airbyte.commons.util.AutoCloseableIterators;
30+
import io.airbyte.commons.util.CompositeIterator;
31+
import io.airbyte.commons.util.MoreIterators;
32+
import io.airbyte.integrations.debezium.internals.AirbyteFileOffsetBackingStore;
33+
import io.airbyte.integrations.debezium.internals.AirbyteSchemaHistoryStorage;
34+
import io.airbyte.integrations.debezium.internals.DebeziumEventUtils;
35+
import io.airbyte.integrations.debezium.internals.DebeziumRecordIterator;
36+
import io.airbyte.integrations.debezium.internals.DebeziumRecordPublisher;
37+
import io.airbyte.integrations.debezium.internals.FilteredFileDatabaseHistory;
38+
import io.airbyte.protocol.models.AirbyteMessage;
39+
import io.airbyte.protocol.models.ConfiguredAirbyteCatalog;
40+
import io.debezium.engine.ChangeEvent;
41+
import java.time.Instant;
42+
import java.util.Collections;
43+
import java.util.Iterator;
44+
import java.util.List;
45+
import java.util.Map;
46+
import java.util.Optional;
47+
import java.util.Properties;
48+
import java.util.concurrent.LinkedBlockingQueue;
49+
import java.util.function.Supplier;
50+
import org.slf4j.Logger;
51+
import org.slf4j.LoggerFactory;
52+
53+
/**
54+
* This class acts as the bridge between Airbyte DB connectors and debezium. If a DB connector wants
55+
* to use debezium for CDC, it should use this class
56+
*/
57+
public class AirbyteDebeziumHandler {
58+
59+
private static final Logger LOGGER = LoggerFactory.getLogger(AirbyteDebeziumHandler.class);
60+
/**
61+
* We use 10000 as capacity cause the default queue size and batch size of debezium is :
62+
* {@link io.debezium.config.CommonConnectorConfig#DEFAULT_MAX_BATCH_SIZE}is 2048
63+
* {@link io.debezium.config.CommonConnectorConfig#DEFAULT_MAX_QUEUE_SIZE} is 8192
64+
*/
65+
private static final int QUEUE_CAPACITY = 10000;
66+
67+
private final Properties connectorProperties;
68+
private final JsonNode config;
69+
private final CdcTargetPosition targetPosition;
70+
private final ConfiguredAirbyteCatalog catalog;
71+
private final boolean trackSchemaHistory;
72+
73+
private final LinkedBlockingQueue<ChangeEvent<String, String>> queue;
74+
75+
public AirbyteDebeziumHandler(JsonNode config,
76+
CdcTargetPosition targetPosition,
77+
Properties connectorProperties,
78+
ConfiguredAirbyteCatalog catalog,
79+
boolean trackSchemaHistory) {
80+
this.config = config;
81+
this.targetPosition = targetPosition;
82+
this.connectorProperties = connectorProperties;
83+
this.catalog = catalog;
84+
this.trackSchemaHistory = trackSchemaHistory;
85+
this.queue = new LinkedBlockingQueue<>(QUEUE_CAPACITY);
86+
}
87+
88+
public List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(CdcSavedInfoFetcher cdcSavedInfoFetcher,
89+
CdcStateHandler cdcStateHandler,
90+
CdcMetadataInjector cdcMetadataInjector,
91+
Instant emittedAt) {
92+
LOGGER.info("using CDC: {}", true);
93+
final AirbyteFileOffsetBackingStore offsetManager = AirbyteFileOffsetBackingStore.initializeState(cdcSavedInfoFetcher.getSavedOffset());
94+
final Optional<AirbyteSchemaHistoryStorage> schemaHistoryManager = schemaHistoryManager(cdcSavedInfoFetcher);
95+
final DebeziumRecordPublisher publisher = new DebeziumRecordPublisher(connectorProperties, config, catalog, offsetManager,
96+
schemaHistoryManager);
97+
publisher.start(queue);
98+
99+
// handle state machine around pub/sub logic.
100+
final AutoCloseableIterator<ChangeEvent<String, String>> eventIterator = new DebeziumRecordIterator(
101+
queue,
102+
targetPosition,
103+
publisher::hasClosed,
104+
publisher::close);
105+
106+
// convert to airbyte message.
107+
final AutoCloseableIterator<AirbyteMessage> messageIterator = AutoCloseableIterators
108+
.transform(
109+
eventIterator,
110+
(event) -> DebeziumEventUtils.toAirbyteMessage(event, cdcMetadataInjector, emittedAt));
111+
112+
// our goal is to get the state at the time this supplier is called (i.e. after all message records
113+
// have been produced)
114+
final Supplier<AirbyteMessage> stateMessageSupplier = () -> {
115+
Map<String, String> offset = offsetManager.read();
116+
String dbHistory = trackSchemaHistory ? schemaHistoryManager
117+
.orElseThrow(() -> new RuntimeException("Schema History Tracking is true but manager is not initialised")).read() : null;
118+
119+
return cdcStateHandler.saveState(offset, dbHistory);
120+
};
121+
122+
// wrap the supplier in an iterator so that we can concat it to the message iterator.
123+
final Iterator<AirbyteMessage> stateMessageIterator = MoreIterators.singletonIteratorFromSupplier(stateMessageSupplier);
124+
125+
// this structure guarantees that the debezium engine will be closed, before we attempt to emit the
126+
// state file. we want this so that we have a guarantee that the debezium offset file (which we use
127+
// to produce the state file) is up-to-date.
128+
final CompositeIterator<AirbyteMessage> messageIteratorWithStateDecorator =
129+
AutoCloseableIterators.concatWithEagerClose(messageIterator, AutoCloseableIterators.fromIterator(stateMessageIterator));
130+
131+
return Collections.singletonList(messageIteratorWithStateDecorator);
132+
}
133+
134+
private Optional<AirbyteSchemaHistoryStorage> schemaHistoryManager(CdcSavedInfoFetcher cdcSavedInfoFetcher) {
135+
if (trackSchemaHistory) {
136+
FilteredFileDatabaseHistory.setDatabaseName(config.get("database").asText());
137+
return Optional.of(AirbyteSchemaHistoryStorage.initializeDBHistory(cdcSavedInfoFetcher.getSavedSchemaHistory()));
138+
}
139+
140+
return Optional.empty();
141+
}
142+
143+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2020 Airbyte
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package io.airbyte.integrations.debezium;
26+
27+
import com.fasterxml.jackson.databind.JsonNode;
28+
import com.fasterxml.jackson.databind.node.ObjectNode;
29+
30+
/**
31+
* This interface is used to add metadata to the records fetched from the database. For instance, in
32+
* Postgres we add the lsn to the records. In MySql we add the file name and position to the
33+
* records.
34+
*/
35+
public interface CdcMetadataInjector {
36+
37+
/**
38+
* A debezium record contains multiple pieces. Ref :
39+
* https://debezium.io/documentation/reference/1.4/connectors/mysql.html#mysql-create-events
40+
*
41+
* @param event is the actual record which contains data and would be written to the destination
42+
* @param source contains the metadata about the record and we need to extract that metadata and add
43+
* it to the event before writing it to destination
44+
*/
45+
void addMetaData(ObjectNode event, JsonNode source);
46+
47+
/**
48+
* As part of Airbyte record we need to add the namespace (schema name)
49+
*
50+
* @param source part of debezium record and contains the metadata about the record. We need to
51+
* extract namespace out of this metadata and return Ref :
52+
* https://debezium.io/documentation/reference/1.4/connectors/mysql.html#mysql-create-events
53+
*/
54+
String namespace(JsonNode source);
55+
56+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2020 Airbyte
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package io.airbyte.integrations.debezium;
26+
27+
import com.fasterxml.jackson.databind.JsonNode;
28+
import java.util.Optional;
29+
30+
/**
31+
* This interface is used to fetch the saved info required for debezium to run incrementally. Each
32+
* connector saves offset and schema history in different manner
33+
*/
34+
public interface CdcSavedInfoFetcher {
35+
36+
JsonNode getSavedOffset();
37+
38+
Optional<JsonNode> getSavedSchemaHistory();
39+
40+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2020 Airbyte
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package io.airbyte.integrations.debezium;
26+
27+
import io.airbyte.protocol.models.AirbyteMessage;
28+
import java.util.Map;
29+
30+
/**
31+
* This interface is used to allow connectors to save the offset and schema history in the manner
32+
* which suits them
33+
*/
34+
@FunctionalInterface
35+
public interface CdcStateHandler {
36+
37+
AirbyteMessage saveState(Map<String, String> offset, String dbHistory);
38+
39+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* MIT License
3+
*
4+
* Copyright (c) 2020 Airbyte
5+
*
6+
* Permission is hereby granted, free of charge, to any person obtaining a copy
7+
* of this software and associated documentation files (the "Software"), to deal
8+
* in the Software without restriction, including without limitation the rights
9+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10+
* copies of the Software, and to permit persons to whom the Software is
11+
* furnished to do so, subject to the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be included in all
14+
* copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22+
* SOFTWARE.
23+
*/
24+
25+
package io.airbyte.integrations.debezium;
26+
27+
import com.fasterxml.jackson.databind.JsonNode;
28+
29+
/**
30+
* This interface is used to define the target position at the beginning of the sync so that once we
31+
* reach the desired target, we can shutdown the sync. This is needed because it might happen that
32+
* while we are syncing the data, new changes are being made in the source database and as a result
33+
* we might end up syncing forever. In order to tackle that, we need to define a point to end at the
34+
* beginning of the sync
35+
*/
36+
public interface CdcTargetPosition {
37+
38+
boolean reachedTargetPosition(JsonNode valueAsJson);
39+
40+
}

0 commit comments

Comments
 (0)