Skip to content

Commit cb9eae3

Browse files
authored
[Source-mysql] : Add meta error handling in initial load path (#37328)
1 parent ca394d2 commit cb9eae3

File tree

6 files changed

+101
-16
lines changed

6 files changed

+101
-16
lines changed

airbyte-integrations/connectors/source-mysql/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ data:
99
connectorSubtype: database
1010
connectorType: source
1111
definitionId: 435bb9a5-7887-4809-aa58-28c27df0d7ad
12-
dockerImageTag: 3.3.18
12+
dockerImageTag: 3.3.19
1313
dockerRepository: airbyte/source-mysql
1414
documentationUrl: https://docs.airbyte.com/integrations/sources/mysql
1515
githubIssueLabel: source-mysql

airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/MySqlSourceOperations.java

+6-4
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
import com.mysql.cj.result.Field;
4747
import io.airbyte.cdk.db.SourceOperations;
4848
import io.airbyte.cdk.db.jdbc.AbstractJdbcCompatibleSourceOperations;
49+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
4950
import io.airbyte.integrations.source.mysql.initialsync.CdcMetadataInjector;
5051
import io.airbyte.protocol.models.JsonSchemaType;
5152
import java.sql.PreparedStatement;
@@ -81,13 +82,14 @@ public MySqlSourceOperations(final Optional<CdcMetadataInjector> metadataInjecto
8182
}
8283

8384
@Override
84-
public JsonNode rowToJson(final ResultSet queryContext) throws SQLException {
85-
final ObjectNode jsonNode = (ObjectNode) super.rowToJson(queryContext);
85+
public AirbyteRecordData convertDatabaseRowToAirbyteRecordData(final ResultSet queryContext) throws SQLException {
86+
final AirbyteRecordData recordData = super.convertDatabaseRowToAirbyteRecordData(queryContext);
87+
final ObjectNode jsonNode = (ObjectNode) recordData.rawRowData();
8688
if (!metadataInjector.isPresent()) {
87-
return jsonNode;
89+
return recordData;
8890
}
8991
metadataInjector.get().inject(jsonNode);
90-
return jsonNode;
92+
return new AirbyteRecordData(jsonNode, recordData.meta());
9193
}
9294

9395
/**

airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialLoadHandler.java

+10-3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import com.fasterxml.jackson.databind.JsonNode;
1111
import com.google.common.annotations.VisibleForTesting;
1212
import com.mysql.cj.MysqlType;
13+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
1314
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
1415
import io.airbyte.cdk.integrations.debezium.DebeziumIteratorConstants;
1516
import io.airbyte.cdk.integrations.source.relationaldb.DbSourceDiscoverUtil;
@@ -27,6 +28,7 @@
2728
import io.airbyte.protocol.models.v0.AirbyteMessage;
2829
import io.airbyte.protocol.models.v0.AirbyteMessage.Type;
2930
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
31+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta;
3032
import io.airbyte.protocol.models.v0.AirbyteStream;
3133
import io.airbyte.protocol.models.v0.CatalogHelpers;
3234
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
@@ -110,7 +112,7 @@ public List<AutoCloseableIterator<AirbyteMessage>> getIncrementalIterators(
110112
}
111113
});
112114

113-
final AutoCloseableIterator<JsonNode> queryStream =
115+
final AutoCloseableIterator<AirbyteRecordData> queryStream =
114116
new MySqlInitialLoadRecordIterator(database, sourceOperations, quoteString, initialLoadStateManager, selectedDatabaseFields, pair,
115117
calculateChunkSize(tableSizeInfoMap.get(pair), pair), isCompositePrimaryKey(airbyteStream));
116118
final AutoCloseableIterator<AirbyteMessage> recordIterator =
@@ -144,7 +146,7 @@ public static long calculateChunkSize(final TableSizeInfo tableSizeInfo, final A
144146

145147
// Transforms the given iterator to create an {@link AirbyteRecordMessage}
146148
private AutoCloseableIterator<AirbyteMessage> getRecordIterator(
147-
final AutoCloseableIterator<JsonNode> recordIterator,
149+
final AutoCloseableIterator<AirbyteRecordData> recordIterator,
148150
final String streamName,
149151
final String namespace,
150152
final long emittedAt) {
@@ -154,7 +156,12 @@ private AutoCloseableIterator<AirbyteMessage> getRecordIterator(
154156
.withStream(streamName)
155157
.withNamespace(namespace)
156158
.withEmittedAt(emittedAt)
157-
.withData(r)));
159+
.withData(r.rawRowData())
160+
.withMeta(isMetaChangesEmptyOrNull(r.meta()) ? null : r.meta())));
161+
}
162+
163+
private boolean isMetaChangesEmptyOrNull(AirbyteRecordMessageMeta meta) {
164+
return meta == null || meta.getChanges() == null || meta.getChanges().isEmpty();
158165
}
159166

160167
// Augments the given iterator with record count logs.

airbyte-integrations/connectors/source-mysql/src/main/java/io/airbyte/integrations/source/mysql/initialsync/MySqlInitialLoadRecordIterator.java

+7-7
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,10 @@
44

55
package io.airbyte.integrations.source.mysql.initialsync;
66

7-
import com.fasterxml.jackson.databind.JsonNode;
87
import com.google.common.collect.AbstractIterator;
98
import com.mysql.cj.MysqlType;
109
import io.airbyte.cdk.db.JdbcCompatibleSourceOperations;
10+
import io.airbyte.cdk.db.jdbc.AirbyteRecordData;
1111
import io.airbyte.cdk.db.jdbc.JdbcDatabase;
1212
import io.airbyte.cdk.integrations.source.relationaldb.RelationalDbQueryUtils;
1313
import io.airbyte.commons.util.AutoCloseableIterator;
@@ -37,8 +37,8 @@
3737
* records processed here.
3838
*/
3939
@SuppressWarnings("try")
40-
public class MySqlInitialLoadRecordIterator extends AbstractIterator<JsonNode>
41-
implements AutoCloseableIterator<JsonNode> {
40+
public class MySqlInitialLoadRecordIterator extends AbstractIterator<AirbyteRecordData>
41+
implements AutoCloseableIterator<AirbyteRecordData> {
4242

4343
private static final Logger LOGGER = LoggerFactory.getLogger(MySqlInitialLoadRecordIterator.class);
4444

@@ -54,7 +54,7 @@ public class MySqlInitialLoadRecordIterator extends AbstractIterator<JsonNode>
5454
private final PrimaryKeyInfo pkInfo;
5555
private final boolean isCompositeKeyLoad;
5656
private int numSubqueries = 0;
57-
private AutoCloseableIterator<JsonNode> currentIterator;
57+
private AutoCloseableIterator<AirbyteRecordData> currentIterator;
5858

5959
MySqlInitialLoadRecordIterator(
6060
final JdbcDatabase database,
@@ -78,7 +78,7 @@ public class MySqlInitialLoadRecordIterator extends AbstractIterator<JsonNode>
7878

7979
@CheckForNull
8080
@Override
81-
protected JsonNode computeNext() {
81+
protected AirbyteRecordData computeNext() {
8282
if (shouldBuildNextSubquery()) {
8383
try {
8484
// We will only issue one query for a composite key load. If we have already processed all the data
@@ -93,8 +93,8 @@ protected JsonNode computeNext() {
9393
}
9494

9595
LOGGER.info("Subquery number : {}", numSubqueries);
96-
final Stream<JsonNode> stream = database.unsafeQuery(
97-
this::getPkPreparedStatement, sourceOperations::rowToJson);
96+
final Stream<AirbyteRecordData> stream = database.unsafeQuery(
97+
this::getPkPreparedStatement, sourceOperations::convertDatabaseRowToAirbyteRecordData);
9898

9999
currentIterator = AutoCloseableIterators.fromStream(stream, pair);
100100
numSubqueries++;

airbyte-integrations/connectors/source-mysql/src/test/java/io/airbyte/integrations/source/mysql/CdcMysqlSourceTest.java

+75
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import static org.junit.jupiter.api.Assertions.assertTrue;
2626

2727
import com.fasterxml.jackson.databind.JsonNode;
28+
import com.fasterxml.jackson.databind.ObjectMapper;
2829
import com.fasterxml.jackson.databind.node.ObjectNode;
2930
import com.google.common.collect.ImmutableList;
3031
import com.google.common.collect.ImmutableMap;
@@ -50,6 +51,10 @@
5051
import io.airbyte.protocol.models.v0.AirbyteGlobalState;
5152
import io.airbyte.protocol.models.v0.AirbyteMessage;
5253
import io.airbyte.protocol.models.v0.AirbyteRecordMessage;
54+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMeta;
55+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange;
56+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Change;
57+
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason;
5358
import io.airbyte.protocol.models.v0.AirbyteStateMessage;
5459
import io.airbyte.protocol.models.v0.AirbyteStateMessage.AirbyteStateType;
5560
import io.airbyte.protocol.models.v0.AirbyteStream;
@@ -59,6 +64,7 @@
5964
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
6065
import io.airbyte.protocol.models.v0.StreamDescriptor;
6166
import io.airbyte.protocol.models.v0.SyncMode;
67+
import java.util.ArrayList;
6268
import java.util.Collections;
6369
import java.util.HashSet;
6470
import java.util.List;
@@ -80,6 +86,11 @@ public class CdcMysqlSourceTest extends CdcSourceTest<MySqlSource, MySQLTestData
8086

8187
private static final Random RANDOM = new Random();
8288

89+
private static final String TEST_DATE_STREAM_NAME = "TEST_DATE_TABLE";
90+
private static final String COL_DATE_TIME = "CAR_DATE";
91+
private static final List<JsonNode> DATE_TIME_RECORDS = ImmutableList.of(
92+
Jsons.jsonNode(ImmutableMap.of(COL_ID, 120, COL_DATE_TIME, "'2023-00-00 20:37:47'")));
93+
8394
@Override
8495
protected MySQLTestDatabase createTestDatabase() {
8596
return MySQLTestDatabase.in(BaseImage.MYSQL_8, ContainerModifier.INVALID_TIMEZONE_CEST).withCdcPermissions();
@@ -734,6 +745,70 @@ public void testCompressedSchemaHistory() throws Exception {
734745
assertEquals(recordsToCreate, extractRecordMessages(dataFromSecondBatch).size());
735746
}
736747

748+
private void writeDateRecords(
749+
final JsonNode recordJson,
750+
final String dbName,
751+
final String streamName,
752+
final String idCol,
753+
final String dateCol) {
754+
testdb.with("INSERT INTO `%s` .`%s` (%s, %s) VALUES (%s, %s);", dbName, streamName,
755+
idCol, dateCol,
756+
recordJson.get(idCol).asInt(), recordJson.get(dateCol).asText());
757+
}
758+
759+
@Test
760+
public void testInvalidDatetime_metaChangesPopulated() throws Exception {
761+
final ConfiguredAirbyteCatalog configuredCatalog = Jsons.clone(getConfiguredCatalog());
762+
763+
// Add a datetime stream to the catalog
764+
testdb
765+
.withoutStrictMode()
766+
.with(createTableSqlFmt(), getDatabaseName(), TEST_DATE_STREAM_NAME,
767+
columnClause(ImmutableMap.of(COL_ID, "INTEGER", COL_DATE_TIME, "DATETIME"), Optional.of(COL_ID)));
768+
769+
for (final JsonNode recordJson : DATE_TIME_RECORDS) {
770+
writeDateRecords(recordJson, getDatabaseName(), TEST_DATE_STREAM_NAME, COL_ID, COL_DATE_TIME);
771+
}
772+
773+
final ConfiguredAirbyteStream airbyteStream = new ConfiguredAirbyteStream()
774+
.withStream(CatalogHelpers.createAirbyteStream(
775+
TEST_DATE_STREAM_NAME,
776+
getDatabaseName(),
777+
Field.of(COL_ID, JsonSchemaType.INTEGER),
778+
Field.of(COL_DATE_TIME, JsonSchemaType.STRING_TIMESTAMP_WITHOUT_TIMEZONE))
779+
.withSupportedSyncModes(
780+
Lists.newArrayList(SyncMode.FULL_REFRESH, SyncMode.INCREMENTAL))
781+
.withSourceDefinedPrimaryKey(List.of(List.of(COL_ID))));
782+
airbyteStream.setSyncMode(SyncMode.INCREMENTAL);
783+
784+
final List<ConfiguredAirbyteStream> streams = new ArrayList<>();
785+
streams.add(airbyteStream);
786+
configuredCatalog.withStreams(streams);
787+
788+
final AutoCloseableIterator<AirbyteMessage> read1 = source()
789+
.read(config(), configuredCatalog, null);
790+
final List<AirbyteMessage> actualRecords = AutoCloseableIterators.toListAndClose(read1);
791+
792+
// Sync is expected to succeed with one record. However, the meta changes column should be populated
793+
// for this record
794+
// as it is an invalid date. As a result, this field will be omitted as Airbyte is unable to
795+
// serialize the source value.
796+
final Set<AirbyteRecordMessage> recordMessages = extractRecordMessages(actualRecords);
797+
assertEquals(recordMessages.size(), 1);
798+
final AirbyteRecordMessage invalidDateRecord = recordMessages.stream().findFirst().get();
799+
800+
final AirbyteRecordMessageMetaChange expectedChange =
801+
new AirbyteRecordMessageMetaChange().withReason(Reason.SOURCE_SERIALIZATION_ERROR).withChange(
802+
Change.NULLED).withField(COL_DATE_TIME);
803+
final AirbyteRecordMessageMeta expectedMessageMeta = new AirbyteRecordMessageMeta().withChanges(List.of(expectedChange));
804+
assertEquals(expectedMessageMeta, invalidDateRecord.getMeta());
805+
806+
ObjectMapper mapper = new ObjectMapper();
807+
final JsonNode expectedDataWithoutCdcFields = mapper.readTree("{\"id\":120}");
808+
removeCDCColumns((ObjectNode) invalidDateRecord.getData());
809+
assertEquals(expectedDataWithoutCdcFields, invalidDateRecord.getData());
810+
}
811+
737812
private void createTablesToIncreaseSchemaHistorySize() {
738813
for (int i = 0; i <= 200; i++) {
739814
final String tableName = generateRandomStringOf32Characters();

docs/integrations/sources/mysql.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,8 @@ Any database or table encoding combination of charset and collation is supported
223223

224224
| Version | Date | Pull Request | Subject |
225225
|:--------|:-----------|:-----------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------|
226-
| 3.3.18 | 2024-04-15 | [36919](https://github.com/airbytehq/airbyte/pull/36919) | Refactor source operations. |
226+
| 3.3.19 | 2024-04-15 | [37328](https://github.com/airbytehq/airbyte/pull/37328) | Populate airbyte_meta.changes |
227+
| 3.3.18 | 2024-04-15 | [37324](https://github.com/airbytehq/airbyte/pull/37324) | Refactor source operations. |
227228
| 3.3.17 | 2024-04-10 | [36919](https://github.com/airbytehq/airbyte/pull/36919) | Fix a bug in conversion of null values. |
228229
| 3.3.16 | 2024-04-05 | [36872](https://github.com/airbytehq/airbyte/pull/36872) | Update to connector's metadat definition. |
229230
| 3.3.15 | 2024-04-05 | [36577](https://github.com/airbytehq/airbyte/pull/36577) | Config error will not send out system trace message |

0 commit comments

Comments
 (0)