Skip to content

🐛 Destination Postgres: fix \u0000(NULL) value processing #5336

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Aug 30, 2021
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,6 @@ plugins {
id 'airbyte-integration-test-java'
}

application {
mainClass = 'io.airbyte.integrations.destination.jdbc.JdbcDestination'
}

dependencies {
implementation 'com.google.cloud:google-cloud-storage:1.113.16'
implementation 'com.google.auth:google-auth-library-oauth2-http:0.25.5'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,28 +29,22 @@
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.protocol.models.AirbyteRecordMessage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.sql.SQLException;
import java.sql.Timestamp;
import java.time.Instant;
import java.util.List;
import java.util.UUID;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVPrinter;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class DefaultSqlOperations implements SqlOperations {
public abstract class JdbcSqlOperations implements SqlOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(DefaultSqlOperations.class);
private static final Logger LOGGER = LoggerFactory.getLogger(JdbcSqlOperations.class);

@Override
public void createSchemaIfNotExists(JdbcDatabase database, String schemaName) throws Exception {
Expand All @@ -77,38 +71,8 @@ public String createTableQuery(JdbcDatabase database, String schemaName, String
schemaName, tableName, JavaBaseConstants.COLUMN_NAME_AB_ID, JavaBaseConstants.COLUMN_NAME_DATA, JavaBaseConstants.COLUMN_NAME_EMITTED_AT);
}

@Override
public void insertRecords(JdbcDatabase database, List<AirbyteRecordMessage> records, String schemaName, String tmpTableName) throws SQLException {
if (records.isEmpty()) {
return;
}

// todo (cgardens) - move this into a postgres version of this. this syntax is postgres-specific
database.execute(connection -> {
File tmpFile = null;
try {
tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile();
writeBatchToFile(tmpFile, records);

var copyManager = new CopyManager(connection.unwrap(BaseConnection.class));
var sql = String.format("COPY %s.%s FROM stdin DELIMITER ',' CSV", schemaName, tmpTableName);
var bufferedReader = new BufferedReader(new FileReader(tmpFile));
copyManager.copyIn(sql, bufferedReader);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
if (tmpFile != null) {
Files.delete(tmpFile.toPath());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}

protected void writeBatchToFile(File tmpFile, List<AirbyteRecordMessage> records) throws Exception {
LOGGER.warn("writeBatchToFile : " + records);
PrintWriter writer = null;
try {
writer = new PrintWriter(tmpFile, StandardCharsets.UTF_8);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ void testInsertRawRecordsInSingleQuery() throws SQLException {
final UUID RECORD2_UUID = UUID.randomUUID();
when(uuidSupplier.get()).thenReturn(RECORD1_UUID).thenReturn(RECORD2_UUID);

new DefaultSqlOperations().createTableIfNotExists(database, SCHEMA_NAME, STREAM_NAME);
new TestJdbcSqlOperations().createTableIfNotExists(database, SCHEMA_NAME, STREAM_NAME);

final String insertQueryComponent = String.format(
"INSERT INTO %s.%s (%s, %s, %s) VALUES\n",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,32 +24,19 @@

package io.airbyte.integrations.destination.jdbc;

import com.fasterxml.jackson.databind.JsonNode;
import io.airbyte.integrations.base.Destination;
import io.airbyte.integrations.base.IntegrationRunner;
import io.airbyte.integrations.destination.ExtendedNameTransformer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.protocol.models.AirbyteRecordMessage;
import java.util.List;

public class JdbcDestination extends AbstractJdbcDestination implements Destination {
public class TestJdbcSqlOperations extends JdbcSqlOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(JdbcDestination.class);

public JdbcDestination() {
super("org.postgresql.Driver", new ExtendedNameTransformer(), new DefaultSqlOperations());
}

// no-op for JdbcIntegration since the config it receives is designed to be use for JDBC.
@Override
public JsonNode toJdbcConfig(JsonNode config) {
return config;
}

public static void main(String[] args) throws Exception {
final Destination destination = new JdbcDestination();
LOGGER.info("starting destination: {}", JdbcDestination.class);
new IntegrationRunner(destination).run(args);
LOGGER.info("completed destination: {}", JdbcDestination.class);
public void insertRecords(JdbcDatabase database,
List<AirbyteRecordMessage> records,
String schemaName,
String tableName)
throws Exception {
// Not required for the testing
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.StandardNameTransformer;
import io.airbyte.integrations.destination.jdbc.DefaultSqlOperations;
import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations;
import io.airbyte.protocol.models.AirbyteRecordMessage;
import java.io.File;
import java.io.IOException;
Expand All @@ -38,7 +38,7 @@
import java.util.List;
import java.util.stream.Collectors;

public class MySQLSqlOperations extends DefaultSqlOperations {
public class MySQLSqlOperations extends JdbcSqlOperations {

private boolean isLocalFileEnabled = false;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ COPY build/distributions/${APPLICATION}*.tar ${APPLICATION}.tar

RUN tar xf ${APPLICATION}.tar --strip-components=1

LABEL io.airbyte.version=0.3.9
LABEL io.airbyte.version=0.3.10
LABEL io.airbyte.name=airbyte/destination-postgres
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@
import io.airbyte.integrations.base.Destination;
import io.airbyte.integrations.base.IntegrationRunner;
import io.airbyte.integrations.destination.jdbc.AbstractJdbcDestination;
import io.airbyte.integrations.destination.jdbc.DefaultSqlOperations;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
Expand All @@ -44,7 +43,7 @@ public class PostgresDestination extends AbstractJdbcDestination implements Dest
public static final String DRIVER_CLASS = "org.postgresql.Driver";

public PostgresDestination() {
super(DRIVER_CLASS, new PostgresSQLNameTransformer(), new DefaultSqlOperations());
super(DRIVER_CLASS, new PostgresSQLNameTransformer(), new PostgresSqlOperations());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* MIT License
*
* Copyright (c) 2020 Airbyte
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

package io.airbyte.integrations.destination.postgres;

import io.airbyte.commons.json.Jsons;
import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations;
import io.airbyte.protocol.models.AirbyteRecordMessage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Files;
import java.sql.SQLException;
import java.util.List;
import org.postgresql.copy.CopyManager;
import org.postgresql.core.BaseConnection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class PostgresSqlOperations extends JdbcSqlOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(PostgresSqlOperations.class);

@Override
public void insertRecords(JdbcDatabase database, List<AirbyteRecordMessage> records, String schemaName, String tmpTableName) throws SQLException {
if (records.isEmpty()) {
return;
}

database.execute(connection -> {
File tmpFile = null;
try {
tmpFile = Files.createTempFile(tmpTableName + "-", ".tmp").toFile();
writeBatchToFile(tmpFile, formatRecords(records));

var copyManager = new CopyManager(connection.unwrap(BaseConnection.class));
var sql = String.format("COPY %s.%s FROM stdin DELIMITER ',' CSV", schemaName, tmpTableName);
var bufferedReader = new BufferedReader(new FileReader(tmpFile));
copyManager.copyIn(sql, bufferedReader);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
if (tmpFile != null) {
Files.delete(tmpFile.toPath());
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
});
}

private List<AirbyteRecordMessage> formatRecords(List<AirbyteRecordMessage> records) {
// Postgres fails if json contains \u0000 unicode (NULL) in a json.
records.forEach(airbyteRecordMessage -> airbyteRecordMessage
.setData(Jsons.deserialize(Jsons.serialize(airbyteRecordMessage.getData()).replaceAll("\\\\u0000", ""))));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks fine but my concern is that doing the Jsons.deserialize(Jsons.serialize for each record here is going to have a performance impact. How about we move this to BufferedStreamConsumer. We already have a string conversion here so it would save us from doing the serialization twice.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point! Thanks ;)

return records;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.jdbc.DefaultSqlOperations;
import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations;
import io.airbyte.integrations.destination.jdbc.SqlOperations;
import io.airbyte.integrations.destination.jdbc.SqlOperationsUtils;
import io.airbyte.protocol.models.AirbyteRecordMessage;
Expand All @@ -35,7 +35,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class RedshiftSqlOperations extends DefaultSqlOperations implements SqlOperations {
public class RedshiftSqlOperations extends JdbcSqlOperations implements SqlOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(RedshiftSqlOperations.class);
protected static final int REDSHIFT_VARCHAR_MAX_BYTE_SIZE = 65535;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import io.airbyte.db.jdbc.JdbcDatabase;
import io.airbyte.integrations.base.JavaBaseConstants;
import io.airbyte.integrations.destination.jdbc.DefaultSqlOperations;
import io.airbyte.integrations.destination.jdbc.JdbcSqlOperations;
import io.airbyte.integrations.destination.jdbc.SqlOperations;
import io.airbyte.integrations.destination.jdbc.SqlOperationsUtils;
import io.airbyte.protocol.models.AirbyteRecordMessage;
Expand All @@ -35,7 +35,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

class SnowflakeSqlOperations extends DefaultSqlOperations implements SqlOperations {
class SnowflakeSqlOperations extends JdbcSqlOperations implements SqlOperations {

private static final Logger LOGGER = LoggerFactory.getLogger(SnowflakeSqlOperations.class);

Expand Down
4 changes: 4 additions & 0 deletions docs/integrations/destinations/postgres.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,7 @@ From [Postgres SQL Identifiers syntax](https://www.postgresql.org/docs/9.0/sql-s

Therefore, Airbyte Postgres destination will create tables and schemas using the Unquoted identifiers when possible or fallback to Quoted Identifiers if the names are containing special characters.

## Changelog
| Version | Date | Pull Request | Subject |
| :--- | :--- | :--- | :--- |
| 0.3.10 | 2021-08-11 | [#5336](https://github.com/airbytehq/airbyte/pull/5336) | 🐛 Destination Postgres: fix \u0000(NULL) value processing |