|
16 | 16 | import io.airbyte.integrations.destination.jdbc.copy.StreamCopier;
|
17 | 17 | import io.airbyte.integrations.destination.s3.S3Destination;
|
18 | 18 | import io.airbyte.integrations.destination.s3.S3DestinationConfig;
|
| 19 | +import io.airbyte.integrations.destination.s3.csv.S3CsvFormatConfig; |
| 20 | +import io.airbyte.integrations.destination.s3.csv.S3CsvFormatConfig.Flattening; |
| 21 | +import io.airbyte.integrations.destination.s3.csv.S3CsvWriter; |
19 | 22 | import io.airbyte.protocol.models.AirbyteRecordMessage;
|
| 23 | +import io.airbyte.protocol.models.ConfiguredAirbyteStream; |
20 | 24 | import io.airbyte.protocol.models.DestinationSyncMode;
|
21 | 25 | import java.io.IOException;
|
22 | 26 | import java.io.PrintWriter;
|
@@ -58,45 +62,66 @@ public abstract class S3StreamCopier implements StreamCopier {
|
58 | 62 | protected final JdbcDatabase db;
|
59 | 63 | private final ExtendedNameTransformer nameTransformer;
|
60 | 64 | private final SqlOperations sqlOperations;
|
| 65 | + private final ConfiguredAirbyteStream configuredAirbyteStream; |
| 66 | + private final Timestamp uploadTime; |
61 | 67 | protected final Set<String> s3StagingFiles = new HashSet<>();
|
62 | 68 | private final Map<String, StreamTransferManager> multipartUploadManagers = new HashMap<>();
|
63 | 69 | private final Map<String, MultiPartOutputStream> outputStreams = new HashMap<>();
|
64 | 70 | private final Map<String, CSVPrinter> csvPrinters = new HashMap<>();
|
65 | 71 | protected final String stagingFolder;
|
66 | 72 | private final StagingFilenameGenerator filenameGenerator;
|
| 73 | + private final Map<String, S3CsvWriter> stagingWriters = new HashMap<>(); |
67 | 74 |
|
68 | 75 | public S3StreamCopier(final String stagingFolder,
|
69 |
| - final DestinationSyncMode destSyncMode, |
70 | 76 | final String schema,
|
71 |
| - final String streamName, |
72 | 77 | final AmazonS3 client,
|
73 | 78 | final JdbcDatabase db,
|
74 | 79 | final S3DestinationConfig s3Config,
|
75 | 80 | final ExtendedNameTransformer nameTransformer,
|
76 |
| - final SqlOperations sqlOperations) { |
77 |
| - this.destSyncMode = destSyncMode; |
| 81 | + final SqlOperations sqlOperations, |
| 82 | + final ConfiguredAirbyteStream configuredAirbyteStream, |
| 83 | + final Timestamp uploadTime) { |
| 84 | + this.destSyncMode = configuredAirbyteStream.getDestinationSyncMode(); |
78 | 85 | this.schemaName = schema;
|
79 |
| - this.streamName = streamName; |
| 86 | + this.streamName = configuredAirbyteStream.getStream().getName(); |
80 | 87 | this.stagingFolder = stagingFolder;
|
81 | 88 | this.db = db;
|
82 | 89 | this.nameTransformer = nameTransformer;
|
83 | 90 | this.sqlOperations = sqlOperations;
|
84 |
| - this.tmpTableName = nameTransformer.getTmpTableName(streamName); |
| 91 | + this.configuredAirbyteStream = configuredAirbyteStream; |
| 92 | + this.uploadTime = uploadTime; |
| 93 | + this.tmpTableName = nameTransformer.getTmpTableName(this.streamName); |
85 | 94 | this.s3Client = client;
|
86 | 95 | this.s3Config = s3Config;
|
87 |
| - this.filenameGenerator = new StagingFilenameGenerator(streamName, MAX_PARTS_PER_FILE); |
| 96 | + this.filenameGenerator = new StagingFilenameGenerator(this.streamName, MAX_PARTS_PER_FILE); |
88 | 97 | }
|
89 | 98 |
|
90 | 99 | private String prepareS3StagingFile() {
|
91 | 100 | return String.join("/", stagingFolder, schemaName, filenameGenerator.getStagingFilename());
|
92 | 101 | }
|
93 | 102 |
|
| 103 | + /* |
| 104 | + * old behavior: create s3://bucket/randomUuid/(namespace|schemaName)/generatedFilename |
| 105 | + * S3CsvWiter: create s3://bucket/bucketPath(/namespace)?/streamName/time.csv |
| 106 | + */ |
94 | 107 | @Override
|
95 | 108 | public String prepareStagingFile() {
|
96 | 109 | final var name = prepareS3StagingFile();
|
97 |
| - if (!s3StagingFiles.contains(name)) { |
98 |
| - s3StagingFiles.add(name); |
| 110 | + if (!stagingWriters.containsKey(name)) { |
99 | 111 | LOGGER.info("S3 upload part size: {} MB", s3Config.getPartSize());
|
| 112 | + |
| 113 | + try { |
| 114 | + final S3CsvWriter writer = new S3CsvWriter( |
| 115 | + s3Config.cloneWithFormatConfig(new S3CsvFormatConfig(Flattening.ROOT_LEVEL, (long) s3Config.getPartSize())), |
| 116 | + s3Client, |
| 117 | + configuredAirbyteStream, |
| 118 | + uploadTime |
| 119 | + ); |
| 120 | + stagingWriters.put(name, writer); |
| 121 | + } catch (final IOException e) { |
| 122 | + throw new RuntimeException(e); |
| 123 | + } |
| 124 | + |
100 | 125 | // The stream transfer manager lets us greedily stream into S3. The native AWS SDK does not
|
101 | 126 | // have support for streaming multipart uploads;
|
102 | 127 | // The alternative is first writing the entire output to disk before loading into S3. This is not
|
|
0 commit comments