Skip to content

Commit 9dee837

Browse files
authored
[Source-postgres] : Source operations suport for meta column (#36432)
1 parent 0f6214a commit 9dee837

File tree

16 files changed

+179
-107
lines changed

16 files changed

+179
-107
lines changed

airbyte-cdk/java/airbyte-cdk/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ Maven and Gradle will automatically reference the correct (pinned) version of th
144144

145145
| Version | Date | Pull Request | Subject |
146146
|:--------|:-----------|:-----------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------|
147+
| 0.27.6 | 2024-03-26 | [\#36432](https://github.com/airbytehq/airbyte/pull/36432) | Sources support for AirbyteRecordMessageMeta during reading source data types. |
147148
| 0.27.5 | 2024-03-25 | [\#36461](https://github.com/airbytehq/airbyte/pull/36461) | Destinations: Handle case-sensitive columns in destination state handling. |
148149
| 0.27.4 | 2024-03-25 | [\#36333](https://github.com/airbytehq/airbyte/pull/36333) | Sunset DebeziumSourceDecoratingIterator. |
149150
| 0.27.1 | 2024-03-22 | [\#36296](https://github.com/airbytehq/airbyte/pull/36296) | Destinations: (async framework) Do not log invalid message data. |

airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/JdbcCompatibleSourceOperations.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,15 @@
66

77
import com.fasterxml.jackson.databind.JsonNode;
88
import com.fasterxml.jackson.databind.node.ObjectNode;
9+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
910
import java.sql.PreparedStatement;
1011
import java.sql.ResultSet;
1112
import java.sql.SQLException;
1213

1314
public interface JdbcCompatibleSourceOperations<SourceType> extends SourceOperations<ResultSet, SourceType> {
1415

16+
AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException;
17+
1518
/**
1619
* Read from a result set, and copy the value of the column at colIndex to the Json object.
1720
* <p/>

airbyte-cdk/java/airbyte-cdk/core/src/main/java/io/airbyte/cdk/db/jdbc/AbstractJdbcCompatibleSourceOperations.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
import io.airbyte.cdk.db.DataTypeUtils;
1414
import io.airbyte.cdk.db.JdbcCompatibleSourceOperations;
1515
import io.airbyte.commons.json.Jsons;
16+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta;
17+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange;
18+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Change;
19+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason;
1620
import java.math.BigDecimal;
1721
import java.sql.Date;
1822
import java.sql.PreparedStatement;
@@ -28,19 +32,48 @@
2832
import java.time.OffsetTime;
2933
import java.time.chrono.IsoEra;
3034
import java.time.format.DateTimeParseException;
35+
import java.util.ArrayList;
3136
import java.util.Base64;
3237
import java.util.Collections;
38+
import java.util.List;
39+
import org.slf4j.LoggerFactory;
3340

3441
/**
3542
* Source operation skeleton for JDBC compatible databases.
3643
*/
3744
public abstract class AbstractJdbcCompatibleSourceOperations<Datatype> implements JdbcCompatibleSourceOperations<Datatype> {
3845

46+
private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(AbstractJdbcCompatibleSourceOperations.class);
47+
3948
/**
4049
* A Date representing the earliest date in CE. Any date before this is in BCE.
4150
*/
4251
private static final Date ONE_CE = Date.valueOf("0001-01-01");
4352

53+
public AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException {
54+
// the first call communicates with the database. after that the result is cached.
55+
final int columnCount = queryContext.getMetaData().getColumnCount();
56+
final ObjectNode jsonNode = (ObjectNode) Jsons.jsonNode(Collections.emptyMap());
57+
final List<AirbyteRecordMessageMetaChange> metaChanges = new ArrayList<>();
58+
59+
for (int i = 1; i <= columnCount; i++) {
60+
final String columnName = queryContext.getMetaData().getColumnName(i);
61+
try {
62+
// convert to java types that will convert into reasonable json.
63+
copyToJsonField(queryContext, i, jsonNode);
64+
} catch (Exception e) {
65+
LOGGER.info("Failed to serialize column: {}, with error {}", columnName, e.getMessage());
66+
metaChanges.add(
67+
new AirbyteRecordMessageMetaChange()
68+
.withField(columnName)
69+
.withChange(Change.NULLED)
70+
.withReason(Reason.SOURCE_SERIALIZATION_ERROR));
71+
}
72+
}
73+
74+
return new AirbyteRecordData(jsonNode, new AirbyteRecordMessageMeta().withChanges(metaChanges));
75+
}
76+
4477
@Override
4578
public JsonNode rowToJson(final ResultSet queryContext) throws SQLException {
4679
// the first call communicates with the database. after that the result is cached.
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
/*
2+
* Copyright (c) 2024 Airbyte, Inc., all rights reserved.
3+
*/
4+
5+
package io.airbyte.cdk.db.jdbc;
6+
7+
import com.fasterxml.jackson.databind.JsonNode;
8+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta;
9+
10+
public record AirbyteRecordData(JsonNode rawRowData, AirbyteRecordMessageMeta meta) {}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
version=0.27.5
1+
version=0.27.6

airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/debezium/CdcMetadataInjector.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,15 @@ public interface CdcMetadataInjector<T> {
2424
*/
2525
void addMetaData(ObjectNode event, JsonNode source);
2626

27+
// TODO : Remove this - it is deprecated.
2728
default void addMetaDataToRowsFetchedOutsideDebezium(final ObjectNode record, final String transactionTimestamp, final T metadataToAdd) {
2829
throw new RuntimeException("Not Supported");
2930
}
3031

32+
default void addMetaDataToRowsFetchedOutsideDebezium(final ObjectNode record) {
33+
throw new RuntimeException("Not Supported");
34+
}
35+
3136
/**
3237
* As part of Airbyte record we need to add the namespace (schema name)
3338
*

airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/source/jdbc/AbstractJdbcSource.java

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifier;
2626
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.enquoteIdentifierList;
2727
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.getFullyQualifiedTableNameWithQuoting;
28-
import static io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils.queryTable;
2928

3029
import com.fasterxml.jackson.databind.JsonNode;
3130
import com.google.common.annotations.VisibleForTesting;
@@ -36,6 +35,7 @@
3635
import io.airbyte.cdk.db.JdbcCompatibleSourceOperations;
3736
import io.airbyte.cdk.db.SqlDatabase;
3837
import io.airbyte.cdk.db.factory.DataSourceFactory;
38+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
3939
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
4040
import io.airbyte.cdk.db.jdbc.JdbcUtils;
4141
import io.airbyte.cdk.db.jdbc.StreamingJdbcDatabase;
@@ -104,29 +104,43 @@ public AbstractJdbcSource(final String driverClass,
104104
}
105105

106106
@Override
107-
protected AutoCloseableIterator<JsonNode> queryTableFullRefresh(final JdbcDatabase database,
108-
final List<String> columnNames,
109-
final String schemaName,
110-
final String tableName,
111-
final SyncMode syncMode,
112-
final Optional<String> cursorField) {
107+
protected AutoCloseableIterator<AirbyteRecordData> queryTableFullRefresh(final JdbcDatabase database,
108+
final List<String> columnNames,
109+
final String schemaName,
110+
final String tableName,
111+
final SyncMode syncMode,
112+
final Optional<String> cursorField) {
113113
LOGGER.info("Queueing query for table: {}", tableName);
114-
// This corresponds to the initial sync for in INCREMENTAL_MODE, where the ordering of the records
115-
// matters
116-
// as intermediate state messages are emitted (if the connector emits intermediate state).
117-
if (syncMode.equals(SyncMode.INCREMENTAL) && getStateEmissionFrequency() > 0) {
118-
final String quotedCursorField = enquoteIdentifier(cursorField.get(), getQuoteString());
119-
return queryTable(database, String.format("SELECT %s FROM %s ORDER BY %s ASC",
120-
enquoteIdentifierList(columnNames, getQuoteString()),
121-
getFullyQualifiedTableNameWithQuoting(schemaName, tableName, getQuoteString()), quotedCursorField),
122-
tableName, schemaName);
123-
} else {
124-
// If we are in FULL_REFRESH mode, state messages are never emitted, so we don't care about ordering
125-
// of the records.
126-
return queryTable(database, String.format("SELECT %s FROM %s",
127-
enquoteIdentifierList(columnNames, getQuoteString()),
128-
getFullyQualifiedTableNameWithQuoting(schemaName, tableName, getQuoteString())), tableName, schemaName);
129-
}
114+
final io.airbyte.protocol.models.AirbyteStreamNameNamespacePair airbyteStream =
115+
AirbyteStreamUtils.convertFromNameAndNamespace(tableName, schemaName);
116+
return AutoCloseableIterators.lazyIterator(() -> {
117+
try {
118+
final Stream<AirbyteRecordData> stream = database.unsafeQuery(
119+
connection -> {
120+
LOGGER.info("Preparing query for table: {}", tableName);
121+
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName, getQuoteString());
122+
123+
final String wrappedColumnNames = getWrappedColumnNames(database, connection, columnNames, schemaName, tableName);
124+
final StringBuilder sql = new StringBuilder(String.format("SELECT %s FROM %s",
125+
wrappedColumnNames,
126+
fullTableName));
127+
// if the connector emits intermediate states, the incremental query must be sorted by the cursor
128+
// field
129+
if (syncMode.equals(SyncMode.INCREMENTAL) && getStateEmissionFrequency() > 0) {
130+
final String quotedCursorField = enquoteIdentifier(cursorField.get(), getQuoteString());
131+
sql.append(String.format(" ORDER BY %s ASC", quotedCursorField));
132+
}
133+
134+
final PreparedStatement preparedStatement = connection.prepareStatement(sql.toString());
135+
LOGGER.info("Executing query for table {}: {}", tableName, preparedStatement);
136+
return preparedStatement;
137+
},
138+
sourceOperations::convertDatabaseRowToAirbyteRecordData);
139+
return AutoCloseableIterators.fromStream(stream, airbyteStream);
140+
} catch (final SQLException e) {
141+
throw new RuntimeException(e);
142+
}
143+
}, airbyteStream);
130144
}
131145

132146
/**
@@ -322,18 +336,18 @@ public boolean isCursorType(final Datatype type) {
322336
}
323337

324338
@Override
325-
public AutoCloseableIterator<JsonNode> queryTableIncremental(final JdbcDatabase database,
326-
final List<String> columnNames,
327-
final String schemaName,
328-
final String tableName,
329-
final CursorInfo cursorInfo,
330-
final Datatype cursorFieldType) {
339+
public AutoCloseableIterator<AirbyteRecordData> queryTableIncremental(final JdbcDatabase database,
340+
final List<String> columnNames,
341+
final String schemaName,
342+
final String tableName,
343+
final CursorInfo cursorInfo,
344+
final Datatype cursorFieldType) {
331345
LOGGER.info("Queueing query for table: {}", tableName);
332346
final io.airbyte.protocol.models.AirbyteStreamNameNamespacePair airbyteStream =
333347
AirbyteStreamUtils.convertFromNameAndNamespace(tableName, schemaName);
334348
return AutoCloseableIterators.lazyIterator(() -> {
335349
try {
336-
final Stream<JsonNode> stream = database.unsafeQuery(
350+
final Stream<AirbyteRecordData> stream = database.unsafeQuery(
337351
connection -> {
338352
LOGGER.info("Preparing query for table: {}", tableName);
339353
final String fullTableName = getFullyQualifiedTableNameWithQuoting(schemaName, tableName, getQuoteString());
@@ -370,7 +384,7 @@ public AutoCloseableIterator<JsonNode> queryTableIncremental(final JdbcDatabase
370384
sourceOperations.setCursorField(preparedStatement, 1, cursorFieldType, cursorInfo.getCursor());
371385
return preparedStatement;
372386
},
373-
sourceOperations::rowToJson);
387+
sourceOperations::convertDatabaseRowToAirbyteRecordData);
374388
return AutoCloseableIterators.fromStream(stream, airbyteStream);
375389
} catch (final SQLException e) {
376390
throw new RuntimeException(e);

airbyte-cdk/java/airbyte-cdk/db-sources/src/main/java/io/airbyte/cdk/integrations/source/relationaldb/AbstractDbSource.java

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
import datadog.trace.api.Trace;
1313
import io.airbyte.cdk.db.AbstractDatabase;
1414
import io.airbyte.cdk.db.IncrementalUtils;
15+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
1516
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
1617
import io.airbyte.cdk.integrations.JdbcConnector;
1718
import io.airbyte.cdk.integrations.base.AirbyteTraceMessageUtility;
@@ -43,6 +44,7 @@
4344
import io.airbyte.protocol.models.v0.AirbyteMessage;
4445
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
4546
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
47+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta;
4648
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
4749
import io.airbyte.protocol.models.v0.AirbyteStream;
4850
import io.airbyte.protocol.models.v0.AirbyteStreamNameNamespacePair;
@@ -466,7 +468,7 @@ private AutoCloseableIterator<AirbyteMessage> getIncrementalStream(final Databas
466468
table.getFields().stream().anyMatch(f -> f.getName().equals(cursorField)),
467469
String.format("Could not find cursor field %s in table %s", cursorField, table.getName()));
468470

469-
final AutoCloseableIterator<JsonNode> queryIterator = queryTableIncremental(
471+
final AutoCloseableIterator<AirbyteRecordData> queryIterator = queryTableIncremental(
470472
database,
471473
selectedDatabaseFields,
472474
table.getNameSpace(),
@@ -498,26 +500,31 @@ private AutoCloseableIterator<AirbyteMessage> getFullRefreshStream(final Databas
498500
final Instant emittedAt,
499501
final SyncMode syncMode,
500502
final Optional<String> cursorField) {
501-
final AutoCloseableIterator<JsonNode> queryStream =
503+
final AutoCloseableIterator<AirbyteRecordData> queryStream =
502504
queryTableFullRefresh(database, selectedDatabaseFields, table.getNameSpace(),
503505
table.getName(), syncMode, cursorField);
504506
return getMessageIterator(queryStream, streamName, namespace, emittedAt.toEpochMilli());
505507
}
506508

507509
private static AutoCloseableIterator<AirbyteMessage> getMessageIterator(
508-
final AutoCloseableIterator<JsonNode> recordIterator,
510+
final AutoCloseableIterator<AirbyteRecordData> recordIterator,
509511
final String streamName,
510512
final String namespace,
511513
final long emittedAt) {
512514
return AutoCloseableIterators.transform(recordIterator,
513515
new io.airbyte.protocol.models.AirbyteStreamNameNamespacePair(streamName, namespace),
514-
r -> new AirbyteMessage()
516+
airbyteRecordData -> new AirbyteMessage()
515517
.withType(Type.RECORD)
516518
.withRecord(new AirbyteRecordMessage()
517519
.withStream(streamName)
518520
.withNamespace(namespace)
519521
.withEmittedAt(emittedAt)
520-
.withData(r)));
522+
.withData(airbyteRecordData.rawRowData())
523+
.withMeta(isMetaChangesEmptyOrNull(airbyteRecordData.meta()) ? null : airbyteRecordData.meta())));
524+
}
525+
526+
private static boolean isMetaChangesEmptyOrNull(AirbyteRecordMessageMeta meta) {
527+
return meta == null || meta.getChanges() == null || meta.getChanges().isEmpty();
521528
}
522529

523530
/**
@@ -649,12 +656,12 @@ protected abstract Map<String, List<String>> discoverPrimaryKeys(Database databa
649656
* @param syncMode The sync mode that this full refresh stream should be associated with.
650657
* @return iterator with read data
651658
*/
652-
protected abstract AutoCloseableIterator<JsonNode> queryTableFullRefresh(final Database database,
653-
final List<String> columnNames,
654-
final String schemaName,
655-
final String tableName,
656-
final SyncMode syncMode,
657-
final Optional<String> cursorField);
659+
protected abstract AutoCloseableIterator<AirbyteRecordData> queryTableFullRefresh(final Database database,
660+
final List<String> columnNames,
661+
final String schemaName,
662+
final String tableName,
663+
final SyncMode syncMode,
664+
final Optional<String> cursorField);
658665

659666
/**
660667
* Read incremental data from a table. Incremental read should return only records where cursor
@@ -664,12 +671,12 @@ protected abstract AutoCloseableIterator<JsonNode> queryTableFullRefresh(final D
664671
*
665672
* @return iterator with read data
666673
*/
667-
protected abstract AutoCloseableIterator<JsonNode> queryTableIncremental(Database database,
668-
List<String> columnNames,
669-
String schemaName,
670-
String tableName,
671-
CursorInfo cursorInfo,
672-
DataType cursorFieldType);
674+
protected abstract AutoCloseableIterator<AirbyteRecordData> queryTableIncremental(Database database,
675+
List<String> columnNames,
676+
String schemaName,
677+
String tableName,
678+
CursorInfo cursorInfo,
679+
DataType cursorFieldType);
673680

674681
/**
675682
* When larger than 0, the incremental iterator will emit intermediate state for every N records.

airbyte-integrations/connectors/source-postgres/build.gradle

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ java {
1212
}
1313

1414
airbyteJavaConnector {
15-
cdkVersionRequired = '0.27.4'
15+
cdkVersionRequired = '0.27.6'
1616
features = ['db-sources', 'datastore-postgres']
1717
useLocalCdk = false
1818
}

0 commit comments

Comments
 (0)