Skip to content

Commit ddc3d2b

Browse files
authored
[Source-mongo] : Add options for CAPTURE_MODE (#36851)
1 parent d74125b commit ddc3d2b

File tree

9 files changed

+122
-7
lines changed

9 files changed

+122
-7
lines changed

airbyte-integrations/connectors/source-mongodb-v2/build.gradle

+4-4
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ java {
3838
}
3939

4040
dependencies {
41-
implementation 'io.debezium:debezium-embedded:2.4.0.Final'
42-
implementation 'io.debezium:debezium-connector-mongodb:2.4.0.Final'
41+
implementation 'io.debezium:debezium-embedded:2.5.1.Final'
42+
implementation 'io.debezium:debezium-connector-mongodb:2.5.1.Final'
4343

4444
testImplementation 'org.testcontainers:mongodb:1.19.0'
4545

@@ -53,8 +53,8 @@ dependencies {
5353
dataGeneratorImplementation 'org.jetbrains.kotlinx:kotlinx-cli-jvm:0.3.5'
5454
dataGeneratorImplementation 'org.mongodb:mongodb-driver-sync:4.10.2'
5555

56-
debeziumTestImplementation 'io.debezium:debezium-embedded:2.4.0.Final'
57-
debeziumTestImplementation 'io.debezium:debezium-connector-mongodb:2.4.0.Final'
56+
debeziumTestImplementation 'io.debezium:debezium-embedded:2.5.1.Final'
57+
debeziumTestImplementation 'io.debezium:debezium-connector-mongodb:2.5.1.Final'
5858
debeziumTestImplementation 'org.jetbrains.kotlinx:kotlinx-cli-jvm:0.3.5'
5959
debeziumTestImplementation 'com.github.spotbugs:spotbugs-annotations:4.7.3'
6060
}

airbyte-integrations/connectors/source-mongodb-v2/integration_tests/expected_spec.json

+9
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,15 @@
176176
"default": "Fail sync",
177177
"order": 11,
178178
"group": "advanced"
179+
},
180+
"update_capture_mode": {
181+
"type": "string",
182+
"title": "Capture mode (Advanced)",
183+
"description": "Determines how Airbyte looks up the value of an updated document. If 'Lookup' is chosen, the current value of the document will be read. If 'Post Image' is chosen, then the version of the document immediately after an update will be read. WARNING : Severe data loss will occur if this option is chosen and the appropriate settings are not set on your Mongo instance : https://www.mongodb.com/docs/manual/changeStreams/#change-streams-with-document-pre-and-post-images.",
184+
"enum": ["Lookup", "Post Image"],
185+
"default": "Lookup",
186+
"order": 12,
187+
"group": "advanced"
179188
}
180189
},
181190
"groups": [

airbyte-integrations/connectors/source-mongodb-v2/metadata.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ data:
88
connectorSubtype: database
99
connectorType: source
1010
definitionId: b2e713cd-cc36-4c0a-b5bd-b47cb8a0561e
11-
dockerImageTag: 1.3.11
11+
dockerImageTag: 1.3.12
1212
dockerRepository: airbyte/source-mongodb-v2
1313
documentationUrl: https://docs.airbyte.com/integrations/sources/mongodb-v2
1414
githubIssueLabel: source-mongodb-v2

airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io/airbyte/integrations/source/mongodb/MongoConstants.java

+5
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,11 @@ public class MongoConstants {
4141
public static final String FAIL_SYNC_OPTION = "Fail sync";
4242
public static final String RESYNC_DATA_OPTION = "Re-sync data";
4343

44+
public static final String UPDATE_CAPTURE_MODE = "update_capture_mode";
45+
46+
public static final String CAPTURE_MODE_LOOKUP_OPTION = "Lookup";
47+
public static final String CAPTURE_MODE_POST_IMAGE_OPTION = "Post Image";
48+
4449
private MongoConstants() {}
4550

4651
}

airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io/airbyte/integrations/source/mongodb/MongoDbSourceConfig.java

+21-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
package io.airbyte.integrations.source.mongodb;
66

77
import static io.airbyte.integrations.source.mongodb.MongoConstants.AUTH_SOURCE_CONFIGURATION_KEY;
8+
import static io.airbyte.integrations.source.mongodb.MongoConstants.CAPTURE_MODE_LOOKUP_OPTION;
89
import static io.airbyte.integrations.source.mongodb.MongoConstants.CHECKPOINT_INTERVAL;
910
import static io.airbyte.integrations.source.mongodb.MongoConstants.CHECKPOINT_INTERVAL_CONFIGURATION_KEY;
1011
import static io.airbyte.integrations.source.mongodb.MongoConstants.DATABASE_CONFIGURATION_KEY;
@@ -18,9 +19,11 @@
1819
import static io.airbyte.integrations.source.mongodb.MongoConstants.PASSWORD_CONFIGURATION_KEY;
1920
import static io.airbyte.integrations.source.mongodb.MongoConstants.RESYNC_DATA_OPTION;
2021
import static io.airbyte.integrations.source.mongodb.MongoConstants.SCHEMA_ENFORCED_CONFIGURATION_KEY;
22+
import static io.airbyte.integrations.source.mongodb.MongoConstants.UPDATE_CAPTURE_MODE;
2123
import static io.airbyte.integrations.source.mongodb.MongoConstants.USERNAME_CONFIGURATION_KEY;
2224

2325
import com.fasterxml.jackson.databind.JsonNode;
26+
import com.fasterxml.jackson.databind.node.ObjectNode;
2427
import java.util.OptionalInt;
2528

2629
/**
@@ -41,7 +44,12 @@ public record MongoDbSourceConfig(JsonNode rawConfig) {
4144
}
4245

4346
public JsonNode getDatabaseConfig() {
44-
return rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY);
47+
JsonNode rawDbConfigNode = rawConfig.get(DATABASE_CONFIG_CONFIGURATION_KEY);
48+
// Add other properties to the raw db config. Unfortunately, due to the setup of the config json,
49+
// other connection properties need to
50+
// be added to this config.
51+
addAdvancedPropertiesToDatabaseConfig(rawDbConfigNode);
52+
return rawDbConfigNode;
4553
}
4654

4755
public String getAuthSource() {
@@ -107,4 +115,16 @@ public boolean shouldFailSyncOnInvalidCursor() {
107115
}
108116
}
109117

118+
public String getUpdateCaptureMode() {
119+
if (rawConfig.has(UPDATE_CAPTURE_MODE)) {
120+
return rawConfig.get(UPDATE_CAPTURE_MODE).asText();
121+
} else {
122+
return CAPTURE_MODE_LOOKUP_OPTION;
123+
}
124+
}
125+
126+
private void addAdvancedPropertiesToDatabaseConfig(JsonNode dbConfig) {
127+
((ObjectNode) dbConfig).put(UPDATE_CAPTURE_MODE, getUpdateCaptureMode());
128+
}
129+
110130
}

airbyte-integrations/connectors/source-mongodb-v2/src/main/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManager.java

+8
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
package io.airbyte.integrations.source.mongodb.cdc;
66

7+
import static io.airbyte.integrations.source.mongodb.MongoConstants.CAPTURE_MODE_POST_IMAGE_OPTION;
8+
import static io.airbyte.integrations.source.mongodb.MongoConstants.UPDATE_CAPTURE_MODE;
79
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.AUTH_SOURCE_CONFIGURATION_KEY;
810
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CONNECTION_STRING_CONFIGURATION_KEY;
911
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumConstants.Configuration.CREDENTIALS_PLACEHOLDER;
@@ -31,6 +33,9 @@ public class MongoDbDebeziumPropertiesManager extends DebeziumPropertiesManager
3133

3234
static final String COLLECTION_INCLUDE_LIST_KEY = "collection.include.list";
3335
static final String DATABASE_INCLUDE_LIST_KEY = "database.include.list";
36+
37+
static final String MONGODB_POST_IMAGE_KEY = "capture.mode.full.update.type";
38+
static final String MONGODB_POST_IMAGE_VALUE = "post_image";
3439
static final String CAPTURE_TARGET_KEY = "capture.target";
3540
static final String DOUBLE_QUOTES_PATTERN = "\"";
3641
static final String MONGODB_AUTHSOURCE_KEY = "mongodb.authsource";
@@ -65,6 +70,9 @@ protected Properties getConnectionConfiguration(final JsonNode config) {
6570
properties.setProperty(MONGODB_AUTHSOURCE_KEY, config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText());
6671
}
6772
properties.setProperty(MONGODB_SSL_ENABLED_KEY, MONGODB_SSL_ENABLED_VALUE);
73+
if (config.has(UPDATE_CAPTURE_MODE) && config.get(UPDATE_CAPTURE_MODE).asText().equals(CAPTURE_MODE_POST_IMAGE_OPTION)) {
74+
properties.setProperty(MONGODB_POST_IMAGE_KEY, MONGODB_POST_IMAGE_VALUE);
75+
}
6876
return properties;
6977
}
7078

airbyte-integrations/connectors/source-mongodb-v2/src/main/resources/spec.json

+9
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,15 @@
176176
"default": "Fail sync",
177177
"order": 11,
178178
"group": "advanced"
179+
},
180+
"update_capture_mode": {
181+
"type": "string",
182+
"title": "Capture mode (Advanced)",
183+
"description": "Determines how Airbyte looks up the value of an updated document. If 'Lookup' is chosen, the current value of the document will be read. If 'Post Image' is chosen, then the version of the document immediately after an update will be read. WARNING : Severe data loss will occur if this option is chosen and the appropriate settings are not set on your Mongo instance : https://www.mongodb.com/docs/manual/changeStreams/#change-streams-with-document-pre-and-post-images.",
184+
"enum": ["Lookup", "Post Image"],
185+
"default": "Lookup",
186+
"order": 12,
187+
"group": "advanced"
179188
}
180189
},
181190
"groups": [

airbyte-integrations/connectors/source-mongodb-v2/src/test/java/io/airbyte/integrations/source/mongodb/cdc/MongoDbDebeziumPropertiesManagerTest.java

+62
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_CONNECTION_MODE_VALUE;
2020
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_CONNECTION_STRING_KEY;
2121
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_PASSWORD_KEY;
22+
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_POST_IMAGE_KEY;
23+
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_POST_IMAGE_VALUE;
2224
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_SSL_ENABLED_KEY;
2325
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_SSL_ENABLED_VALUE;
2426
import static io.airbyte.integrations.source.mongodb.cdc.MongoDbDebeziumPropertiesManager.MONGODB_USER_KEY;
@@ -34,6 +36,7 @@
3436
import io.airbyte.cdk.integrations.debezium.internals.AirbyteFileOffsetBackingStore;
3537
import io.airbyte.cdk.integrations.debezium.internals.AirbyteSchemaHistoryStorage;
3638
import io.airbyte.commons.json.Jsons;
39+
import io.airbyte.integrations.source.mongodb.MongoConstants;
3740
import io.airbyte.protocol.models.v0.AirbyteStream;
3841
import io.airbyte.protocol.models.v0.ConfiguredAirbyteCatalog;
3942
import io.airbyte.protocol.models.v0.ConfiguredAirbyteStream;
@@ -80,6 +83,65 @@ void testDebeziumProperties() {
8083
assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY));
8184
}
8285

86+
@Test
87+
void testDebeziumProperties_captureMode_lookup() {
88+
final List<ConfiguredAirbyteStream> streams = createStreams(4);
89+
final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class);
90+
final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class);
91+
JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin"));
92+
((ObjectNode) config).put(MongoConstants.UPDATE_CAPTURE_MODE, MongoConstants.CAPTURE_MODE_LOOKUP_OPTION);
93+
94+
when(catalog.getStreams()).thenReturn(streams);
95+
96+
final Properties cdcProperties = new Properties();
97+
cdcProperties.put("test", "value");
98+
99+
final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog);
100+
101+
final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager);
102+
assertEquals(21 + cdcProperties.size(), debeziumProperties.size());
103+
assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY));
104+
assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY));
105+
assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY));
106+
assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY));
107+
assertEquals(config.get(USERNAME_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_USER_KEY));
108+
assertEquals(config.get(PASSWORD_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_PASSWORD_KEY));
109+
assertEquals(config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_AUTHSOURCE_KEY));
110+
assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY));
111+
assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY));
112+
assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY));
113+
}
114+
115+
@Test
116+
void testDebeziumProperties_captureMode_postImage() {
117+
final List<ConfiguredAirbyteStream> streams = createStreams(4);
118+
final AirbyteFileOffsetBackingStore offsetManager = mock(AirbyteFileOffsetBackingStore.class);
119+
final ConfiguredAirbyteCatalog catalog = mock(ConfiguredAirbyteCatalog.class);
120+
JsonNode config = createConfiguration(Optional.of("username"), Optional.of("password"), Optional.of("admin"));
121+
((ObjectNode) config).put(MongoConstants.UPDATE_CAPTURE_MODE, MongoConstants.CAPTURE_MODE_POST_IMAGE_OPTION);
122+
123+
when(catalog.getStreams()).thenReturn(streams);
124+
125+
final Properties cdcProperties = new Properties();
126+
cdcProperties.put("test", "value");
127+
128+
final var debeziumPropertiesManager = new MongoDbDebeziumPropertiesManager(cdcProperties, config, catalog);
129+
130+
final Properties debeziumProperties = debeziumPropertiesManager.getDebeziumProperties(offsetManager);
131+
assertEquals(22 + cdcProperties.size(), debeziumProperties.size());
132+
assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(NAME_KEY));
133+
assertEquals(MongoDbDebeziumPropertiesManager.normalizeName(DATABASE_NAME), debeziumProperties.get(TOPIC_PREFIX_KEY));
134+
assertEquals(EXPECTED_CONNECTION_STRING, debeziumProperties.get(MONGODB_CONNECTION_STRING_KEY));
135+
assertEquals(MONGODB_CONNECTION_MODE_VALUE, debeziumProperties.get(MONGODB_CONNECTION_MODE_KEY));
136+
assertEquals(config.get(USERNAME_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_USER_KEY));
137+
assertEquals(config.get(PASSWORD_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_PASSWORD_KEY));
138+
assertEquals(config.get(AUTH_SOURCE_CONFIGURATION_KEY).asText(), debeziumProperties.get(MONGODB_AUTHSOURCE_KEY));
139+
assertEquals(MONGODB_SSL_ENABLED_VALUE, debeziumProperties.get(MONGODB_SSL_ENABLED_KEY));
140+
assertEquals(debeziumPropertiesManager.createCollectionIncludeString(streams), debeziumProperties.get(COLLECTION_INCLUDE_LIST_KEY));
141+
assertEquals(DATABASE_NAME, debeziumProperties.get(DATABASE_INCLUDE_LIST_KEY));
142+
assertEquals(MONGODB_POST_IMAGE_VALUE, debeziumProperties.get(MONGODB_POST_IMAGE_KEY));
143+
}
144+
83145
@Test
84146
void testDebeziumPropertiesConnectionStringCredentialsPlaceholder() {
85147
final List<ConfiguredAirbyteStream> streams = createStreams(4);

docs/integrations/sources/mongodb-v2.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -210,17 +210,19 @@ When importing a large MongoDB collection for the first time, the import duratio
210210
| Username | The username which is used to access the database. Required for MongoDB Atlas clusters. |
211211
| Password | The password associated with this username. Required for MongoDB Atlas clusters. |
212212
| Authentication Source | (MongoDB Atlas clusters only) Specifies the database that the supplied credentials should be validated against. Defaults to `admin`. See the [MongoDB documentation](https://www.mongodb.com/docs/manual/reference/connection-string/#mongodb-urioption-urioption.authSource) for more details. |
213-
| Schema Enforced | Controls whether schema is discovered and enforced. See discussion in [Schema Enforcement](#Schema-Enforcement). |
213+
| Schema Enforced | Controls whether schema is discovered and enforced. See discussion in [Schema Enforcement](#Schema-Enforcement). |
214214
| Initial Waiting Time in Seconds (Advanced) | The amount of time the connector will wait when it launches to determine if there is new data to sync or not. Defaults to 300 seconds. Valid range: 120 seconds to 1200 seconds. |
215215
| Size of the queue (Advanced) | The size of the internal queue. This may interfere with memory consumption and efficiency of the connector, please be careful. |
216216
| Discovery Sample Size (Advanced) | The maximum number of documents to sample when attempting to discover the unique fields for a collection. Default is 10,000 with a valid range of 1,000 to 100,000. See the [MongoDB sampling method](https://www.mongodb.com/docs/compass/current/sampling/#sampling-method) for more details. |
217+
| Update Capture Mode (Advanced) | Determines how Airbyte looks up the value of an updated document. Default is "Lookup". **IMPORTANT** : "Post image" is only supported in MongoDB version 6.0+. In addition, the collections of interest must be setup to [return pre and post images](https://www.mongodb.com/docs/manual/changeStreams/#change-streams-with-document-pre-and-post-images). Failure to do so will lead to data loss. |
217218

218219
For more information regarding configuration parameters, please see [MongoDb Documentation](https://docs.mongodb.com/drivers/java/sync/v4.10/fundamentals/connection/).
219220

220221
## Changelog
221222

222223
| Version | Date | Pull Request | Subject |
223224
|:--------|:-----------|:---------------------------------------------------------|:----------------------------------------------------------------------------------------------------------|
225+
| 1.3.12 | 2024-05-07 | [36851](https://github.com/airbytehq/airbyte/pull/36851) | Upgrade debezium to version 2.5.1. |
224226
| 1.3.11 | 2024-05-02 | [37753](https://github.com/airbytehq/airbyte/pull/37753) | Chunk size(limit) should correspond to ~1GB of data. |
225227
| 1.3.10 | 2024-05-02 | [37781](https://github.com/airbytehq/airbyte/pull/37781) | Adopt latest CDK. |
226228
| 1.3.9 | 2024-05-01 | [37742](https://github.com/airbytehq/airbyte/pull/37742) | Adopt latest CDK. Remove Debezium retries. |

0 commit comments

Comments
 (0)