Skip to content

Commit 7dfc38d

Browse files
source-postgres-cdc: allow LSN to be acknowledged after sync (#20469)
* WIP * fix issues * fix test * format * more tests * one more test * make lsn_commit_behaviour not required * update expected spec * update expected spec * update expected spec * PR comments * PR comments * upgrade version * auto-bump connector version Co-authored-by: Octavia Squidington III <[email protected]>
1 parent 6a352cf commit 7dfc38d

File tree

19 files changed

+632
-166
lines changed

19 files changed

+632
-166
lines changed

airbyte-config/init/src/main/resources/seed/source_definitions.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1258,7 +1258,7 @@
12581258
- name: Postgres
12591259
sourceDefinitionId: decd338e-5647-4c0b-adf4-da0e75f5a750
12601260
dockerRepository: airbyte/source-postgres
1261-
dockerImageTag: 1.0.34
1261+
dockerImageTag: 1.0.35
12621262
documentationUrl: https://docs.airbyte.com/integrations/sources/postgres
12631263
icon: postgresql.svg
12641264
sourceType: database

airbyte-config/init/src/main/resources/seed/source_specs.yaml

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11364,7 +11364,7 @@
1136411364
supportsNormalization: false
1136511365
supportsDBT: false
1136611366
supported_destination_sync_modes: []
11367-
- dockerImage: "airbyte/source-postgres:1.0.34"
11367+
- dockerImage: "airbyte/source-postgres:1.0.35"
1136811368
spec:
1136911369
documentationUrl: "https://docs.airbyte.com/integrations/sources/postgres"
1137011370
connectionSpecification:
@@ -11592,6 +11592,7 @@
1159211592
- "method"
1159311593
- "replication_slot"
1159411594
- "publication"
11595+
additionalProperties: true
1159511596
properties:
1159611597
method:
1159711598
type: "string"
@@ -11636,6 +11637,19 @@
1163611637
order: 5
1163711638
min: 120
1163811639
max: 1200
11640+
lsn_commit_behaviour:
11641+
type: "string"
11642+
title: "LSN commit behaviour"
11643+
description: "Determines when Airbtye should flush the LSN of processed\
11644+
\ WAL logs in the source database. `After loading Data in the destination`\
11645+
\ is default. If `While reading Data` is selected, in case of a\
11646+
\ downstream failure (while loading data into the destination),\
11647+
\ next sync would result in a full sync."
11648+
enum:
11649+
- "While reading Data"
11650+
- "After loading Data in the destination"
11651+
default: "After loading Data in the destination"
11652+
order: 6
1163911653
tunnel_method:
1164011654
type: "object"
1164111655
title: "SSH Tunnel Method"

airbyte-integrations/bases/debezium-v1-9-6/build.gradle

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,16 @@ dependencies {
1313
implementation 'io.debezium:debezium-embedded:1.9.6.Final'
1414
implementation 'io.debezium:debezium-connector-sqlserver:1.9.6.Final'
1515
implementation 'io.debezium:debezium-connector-mysql:1.9.6.Final'
16-
implementation 'io.debezium:debezium-connector-postgres:1.9.6.Final'
16+
implementation files('debezium-connector-postgres-1.9.6.Final.jar')
1717
implementation 'org.codehaus.plexus:plexus-utils:3.4.2'
1818

1919
testFixturesImplementation project(':airbyte-db:db-lib')
2020
testFixturesImplementation project(':airbyte-integrations:bases:base-java')
2121

22+
testImplementation project(':airbyte-test-utils')
23+
testImplementation libs.connectors.testcontainers.jdbc
24+
testImplementation libs.connectors.testcontainers.postgresql
25+
2226
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-engine:5.4.2'
2327
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-api:5.4.2'
2428
testFixturesImplementation 'org.junit.jupiter:junit-jupiter-params:5.4.2'

airbyte-integrations/bases/debezium-v1-9-6/src/main/java/io/airbyte/integrations/debezium/internals/PostgresDebeziumStateUtil.java

Lines changed: 130 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import static io.debezium.connector.postgresql.SourceInfo.LSN_KEY;
99

1010
import com.fasterxml.jackson.databind.JsonNode;
11+
import io.airbyte.db.jdbc.JdbcUtils;
1112
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
1213
import io.debezium.config.Configuration;
1314
import io.debezium.connector.common.OffsetReader;
@@ -16,10 +17,18 @@
1617
import io.debezium.connector.postgresql.PostgresOffsetContext.Loader;
1718
import io.debezium.connector.postgresql.PostgresPartition;
1819
import io.debezium.connector.postgresql.connection.Lsn;
20+
import io.debezium.jdbc.JdbcConnection.ResultSetMapper;
21+
import io.debezium.jdbc.JdbcConnection.StatementFactory;
1922
import io.debezium.pipeline.spi.Offsets;
2023
import io.debezium.pipeline.spi.Partition;
24+
import java.sql.Connection;
25+
import java.sql.DriverManager;
26+
import java.sql.ResultSet;
27+
import java.sql.SQLException;
28+
import java.sql.Statement;
2129
import java.util.Collections;
2230
import java.util.Map;
31+
import java.util.Objects;
2332
import java.util.Optional;
2433
import java.util.OptionalLong;
2534
import java.util.Properties;
@@ -29,6 +38,10 @@
2938
import org.apache.kafka.connect.runtime.standalone.StandaloneConfig;
3039
import org.apache.kafka.connect.storage.FileOffsetBackingStore;
3140
import org.apache.kafka.connect.storage.OffsetStorageReaderImpl;
41+
import org.postgresql.core.BaseConnection;
42+
import org.postgresql.replication.LogSequenceNumber;
43+
import org.postgresql.replication.PGReplicationStream;
44+
import org.postgresql.replication.fluent.logical.ChainedLogicalStreamBuilder;
3245
import org.slf4j.Logger;
3346
import org.slf4j.LoggerFactory;
3447

@@ -40,33 +53,131 @@ public class PostgresDebeziumStateUtil {
4053

4154
private static final Logger LOGGER = LoggerFactory.getLogger(PostgresDebeziumStateUtil.class);
4255

43-
public boolean isSavedOffsetAfterReplicationSlotLSN(final Properties baseProperties,
44-
final ConfiguredAirbyteCatalog catalog,
45-
final JsonNode cdcState,
46-
final JsonNode replicationSlot,
47-
final JsonNode config) {
56+
public boolean isSavedOffsetAfterReplicationSlotLSN(final JsonNode replicationSlot,
57+
final OptionalLong savedOffset) {
4858

59+
if (Objects.isNull(savedOffset) || savedOffset.isEmpty()) {
60+
return true;
61+
}
62+
63+
if (replicationSlot.has("confirmed_flush_lsn")) {
64+
final long confirmedFlushLsnOnServerSide = Lsn.valueOf(replicationSlot.get("confirmed_flush_lsn").asText()).asLong();
65+
LOGGER.info("Replication slot confirmed_flush_lsn : " + confirmedFlushLsnOnServerSide + " Saved offset LSN : " + savedOffset.getAsLong());
66+
return savedOffset.getAsLong() >= confirmedFlushLsnOnServerSide;
67+
} else if (replicationSlot.has("restart_lsn")) {
68+
final long restartLsn = Lsn.valueOf(replicationSlot.get("restart_lsn").asText()).asLong();
69+
LOGGER.info("Replication slot restart_lsn : " + restartLsn + " Saved offset LSN : " + savedOffset.getAsLong());
70+
return savedOffset.getAsLong() >= restartLsn;
71+
}
72+
73+
// We return true when saved offset is not present cause using an empty offset would result in sync
74+
// from scratch anyway
75+
return true;
76+
}
77+
78+
public OptionalLong savedOffset(final Properties baseProperties,
79+
final ConfiguredAirbyteCatalog catalog,
80+
final JsonNode cdcState,
81+
final JsonNode config) {
4982
final DebeziumPropertiesManager debeziumPropertiesManager = new DebeziumPropertiesManager(baseProperties, config, catalog,
5083
AirbyteFileOffsetBackingStore.initializeState(cdcState),
5184
Optional.empty());
5285
final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties();
53-
final OptionalLong savedOffset = parseSavedOffset(debeziumProperties);
54-
55-
if (savedOffset.isPresent()) {
56-
if (replicationSlot.has("confirmed_flush_lsn")) {
57-
final long confirmedFlushLsnOnServerSide = Lsn.valueOf(replicationSlot.get("confirmed_flush_lsn").asText()).asLong();
58-
LOGGER.info("Replication slot confirmed_flush_lsn : " + confirmedFlushLsnOnServerSide + " Saved offset LSN : " + savedOffset.getAsLong());
59-
return savedOffset.getAsLong() >= confirmedFlushLsnOnServerSide;
60-
} else if (replicationSlot.has("restart_lsn")) {
61-
final long restartLsn = Lsn.valueOf(replicationSlot.get("restart_lsn").asText()).asLong();
62-
LOGGER.info("Replication slot restart_lsn : " + restartLsn + " Saved offset LSN : " + savedOffset.getAsLong());
63-
return savedOffset.getAsLong() >= restartLsn;
86+
return parseSavedOffset(debeziumProperties);
87+
}
88+
89+
private Connection connection(final JsonNode jdbcConfig) throws SQLException {
90+
Properties properties = new Properties();
91+
properties.setProperty("user", jdbcConfig.has(JdbcUtils.USERNAME_KEY) ? jdbcConfig.get(JdbcUtils.USERNAME_KEY).asText()
92+
: null);
93+
properties.setProperty("password", jdbcConfig.has(JdbcUtils.PASSWORD_KEY) ? jdbcConfig.get(JdbcUtils.PASSWORD_KEY).asText()
94+
: null);
95+
properties.setProperty("assumeMinServerVersion", "9.4");
96+
properties.setProperty("ApplicationName", "Airbyte Debezium Streaming");
97+
properties.setProperty("replication", "database");
98+
properties.setProperty("preferQueryMode", "simple"); // replication protocol only supports simple query mode
99+
100+
return DriverManager.getConnection(jdbcConfig.get(JdbcUtils.JDBC_URL_KEY).asText(), properties);
101+
}
102+
103+
public void commitLSNToPostgresDatabase(final JsonNode jdbcConfig,
104+
final OptionalLong savedOffset,
105+
final String slotName,
106+
final String publicationName,
107+
final String plugin) {
108+
if (Objects.isNull(savedOffset) || savedOffset.isEmpty()) {
109+
return;
110+
}
111+
112+
final LogSequenceNumber logSequenceNumber = LogSequenceNumber.valueOf(savedOffset.getAsLong());
113+
114+
try (final BaseConnection pgConnection = ((BaseConnection) connection(jdbcConfig))) {
115+
validateReplicationConnection(pgConnection);
116+
117+
ChainedLogicalStreamBuilder streamBuilder = pgConnection
118+
.getReplicationAPI()
119+
.replicationStream()
120+
.logical()
121+
.withSlotName("\"" + slotName + "\"")
122+
.withStartPosition(logSequenceNumber);
123+
124+
streamBuilder = addSlotOption(publicationName, plugin, pgConnection, streamBuilder);
125+
126+
try (final PGReplicationStream stream = streamBuilder.start()) {
127+
stream.forceUpdateStatus();
128+
129+
stream.setFlushedLSN(logSequenceNumber);
130+
stream.setAppliedLSN(logSequenceNumber);
131+
132+
stream.forceUpdateStatus();
64133
}
134+
} catch (SQLException e) {
135+
throw new RuntimeException(e);
65136
}
137+
}
66138

67-
// We return true when saved offset is not present cause using an empty offset would result in sync
68-
// from scratch anyway
69-
return true;
139+
private ChainedLogicalStreamBuilder addSlotOption(final String publicationName,
140+
final String plugin,
141+
final BaseConnection pgConnection,
142+
ChainedLogicalStreamBuilder streamBuilder) {
143+
if (plugin.equalsIgnoreCase("pgoutput")) {
144+
streamBuilder = streamBuilder.withSlotOption("proto_version", 1)
145+
.withSlotOption("publication_names", publicationName);
146+
147+
if (pgConnection.haveMinimumServerVersion(140000)) {
148+
streamBuilder = streamBuilder.withSlotOption("messages", true);
149+
}
150+
} else if (plugin.equalsIgnoreCase("wal2json")) {
151+
streamBuilder = streamBuilder
152+
.withSlotOption("pretty-print", 1)
153+
.withSlotOption("write-in-chunks", 1)
154+
.withSlotOption("include-xids", 1)
155+
.withSlotOption("include-timestamp", 1)
156+
.withSlotOption("include-not-null", "true");
157+
} else {
158+
throw new RuntimeException("Unknown plugin value : " + plugin);
159+
}
160+
return streamBuilder;
161+
}
162+
163+
private void validateReplicationConnection(final BaseConnection pgConnection) throws SQLException {
164+
final Lsn xlogStart = queryAndMap(pgConnection, "IDENTIFY_SYSTEM", Connection::createStatement, rs -> {
165+
if (!rs.next()) {
166+
throw new IllegalStateException("The DB connection is not a valid replication connection");
167+
}
168+
String xlogpos = rs.getString("xlogpos");
169+
return Lsn.valueOf(xlogpos);
170+
});
171+
}
172+
173+
private <T> T queryAndMap(final Connection conn, final String query, final StatementFactory statementFactory, final ResultSetMapper<T> mapper)
174+
throws SQLException {
175+
Objects.requireNonNull(mapper, "Mapper must be provided");
176+
try (Statement statement = statementFactory.createStatement(conn)) {
177+
try (ResultSet resultSet = statement.executeQuery(query);) {
178+
return mapper.apply(resultSet);
179+
}
180+
}
70181
}
71182

72183
/**

0 commit comments

Comments
 (0)