Skip to content

Commit 84a9831

Browse files
authored
feat(connector): unify and simplify path config of minio and s3 (risingwavelabs#8508)
1 parent 0e93998 commit 84a9831

File tree

6 files changed

+100
-47
lines changed

6 files changed

+100
-47
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ src/log/
4949

5050
log/
5151

52+
*.log
53+
5254
.risingwave/
5355
.bin/
5456

e2e_test/sink/iceberg_sink.slt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,10 @@ statement ok
88
CREATE SINK s6 AS select mv6.v1 as v1, mv6.v2 as v2, mv6.v3 as v3 from mv6 WITH (
99
connector = 'iceberg',
1010
sink.mode='append-only',
11-
location.type='minio',
12-
warehouse.path='minio://hummockadmin:[email protected]:9301/iceberg',
11+
warehouse.path = 's3://iceberg',
12+
s3.endpoint = 'http://127.0.0.1:9301',
13+
s3.access.key = 'hummockadmin',
14+
s3.secret.key = 'hummockadmin',
1315
database.name='demo_db',
1416
table.name='demo_table'
1517
);

integration_tests/iceberg-sink/create_sink.sql

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,10 @@ FROM
33
bhv_mv WITH (
44
connector = 'iceberg',
55
sink.mode='upsert',
6-
location.type='minio',
7-
warehouse.path='minio://hummockadmin:hummockadmin@minio-0:9301/hummock001/iceberg-data',
6+
warehouse.path = 's3://hummock001/iceberg-data',
7+
s3.endpoint = 'http://minio-0:9301',
8+
s3.access.key = 'hummockadmin',
9+
s3.secret.key = 'hummockadmin',
810
database.name='demo_db',
911
table.name='demo_table'
1012
);

java/connector-node/python-client/integration_tests.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -161,17 +161,21 @@ def test_print_sink(input_file):
161161
def test_iceberg_sink(input_file):
162162
test_sink("iceberg",
163163
{"sink.mode":"append-only",
164-
"location.type":"minio",
165-
"warehouse.path":"minio://minioadmin:[email protected]:9000/bucket",
164+
"warehouse.path":"s3a://bucket",
165+
"s3.endpoint": "http://127.0.0.1:9000",
166+
"s3.access.key": "minioadmin",
167+
"s3.secret.key": "minioadmin",
166168
"database.name":"demo_db",
167169
"table.name":"demo_table"},
168170
input_file)
169171

170172
def test_upsert_iceberg_sink(input_file):
171173
test_upsert_sink("iceberg",
172174
{"sink.mode":"upsert",
173-
"location.type":"minio",
174-
"warehouse.path":"minio://minioadmin:[email protected]:9000/bucket",
175+
"warehouse.path":"s3a://bucket",
176+
"s3.endpoint": "http://127.0.0.1:9000",
177+
"s3.access.key": "minioadmin",
178+
"s3.secret.key": "minioadmin",
175179
"database.name":"demo_db",
176180
"table.name":"demo_table"},
177181
input_file)

java/connector-node/risingwave-sink-iceberg/src/main/java/com/risingwave/connector/IcebergSinkFactory.java

Lines changed: 81 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,9 @@
2020
import com.risingwave.connector.api.TableSchema;
2121
import com.risingwave.connector.api.sink.SinkBase;
2222
import com.risingwave.connector.api.sink.SinkFactory;
23-
import com.risingwave.java.utils.MinioUrlParser;
2423
import io.grpc.Status;
24+
import java.net.URI;
25+
import java.net.URISyntaxException;
2526
import java.util.Map;
2627
import java.util.Set;
2728
import org.apache.hadoop.conf.Configuration;
@@ -38,11 +39,15 @@ public class IcebergSinkFactory implements SinkFactory {
3839
private static final Logger LOG = LoggerFactory.getLogger(IcebergSinkFactory.class);
3940

4041
public static final String SINK_MODE_PROP = "sink.mode";
41-
public static final String LOCATION_TYPE_PROP = "location.type";
4242
public static final String WAREHOUSE_PATH_PROP = "warehouse.path";
4343
public static final String DATABASE_NAME_PROP = "database.name";
4444
public static final String TABLE_NAME_PROP = "table.name";
45+
public static final String S3_ACCESS_KEY_PROP = "s3.access.key";
46+
public static final String S3_SECRET_KEY_PROP = "s3.secret.key";
47+
public static final String S3_ENDPOINT_PROP = "s3.endpoint";
4548
public static final FileFormat FILE_FORMAT = FileFormat.PARQUET;
49+
50+
// hadoop catalog config
4651
private static final String confEndpoint = "fs.s3a.endpoint";
4752
private static final String confKey = "fs.s3a.access.key";
4853
private static final String confSecret = "fs.s3a.secret.key";
@@ -56,83 +61,88 @@ public SinkBase create(TableSchema tableSchema, Map<String, String> tablePropert
5661
validate(tableSchema, tableProperties);
5762

5863
String mode = tableProperties.get(SINK_MODE_PROP);
59-
String location = tableProperties.get(LOCATION_TYPE_PROP);
60-
String warehousePath = tableProperties.get(WAREHOUSE_PATH_PROP);
64+
String warehousePath = getWarehousePath(tableProperties);
6165
String databaseName = tableProperties.get(DATABASE_NAME_PROP);
6266
String tableName = tableProperties.get(TABLE_NAME_PROP);
6367

68+
String scheme = parseWarehousePathScheme(warehousePath);
69+
6470
TableIdentifier tableIdentifier = TableIdentifier.of(databaseName, tableName);
65-
HadoopCatalog hadoopCatalog = createHadoopCatalog(location, warehousePath);
71+
Configuration hadoopConf = createHadoopConf(scheme, tableProperties);
72+
HadoopCatalog hadoopCatalog = new HadoopCatalog(hadoopConf, warehousePath);
6673
Table icebergTable;
6774
try {
6875
icebergTable = hadoopCatalog.loadTable(tableIdentifier);
6976
} catch (Exception e) {
70-
LOG.error("load table error: {}", e);
7177
throw Status.FAILED_PRECONDITION
72-
.withDescription("failed to load iceberg table")
78+
.withDescription(
79+
String.format("failed to load iceberg table: %s", e.getMessage()))
7380
.withCause(e)
7481
.asRuntimeException();
7582
}
7683

7784
if (mode.equals("append-only")) {
7885
return new IcebergSink(tableSchema, hadoopCatalog, icebergTable, FILE_FORMAT);
7986
} else if (mode.equals("upsert")) {
80-
return new UpsertIcebergSink(tableSchema, hadoopCatalog, icebergTable, FILE_FORMAT);
87+
return new UpsertIcebergSink(
88+
tableSchema, hadoopCatalog,
89+
icebergTable, FILE_FORMAT);
8190
}
8291
throw UNIMPLEMENTED.withDescription("unsupported mode: " + mode).asRuntimeException();
8392
}
8493

8594
@Override
8695
public void validate(TableSchema tableSchema, Map<String, String> tableProperties) {
8796
if (!tableProperties.containsKey(SINK_MODE_PROP) // only append-only, upsert
88-
|| !tableProperties.containsKey(LOCATION_TYPE_PROP) // only local, s3, minio
8997
|| !tableProperties.containsKey(WAREHOUSE_PATH_PROP)
9098
|| !tableProperties.containsKey(DATABASE_NAME_PROP)
9199
|| !tableProperties.containsKey(TABLE_NAME_PROP)) {
92100
throw INVALID_ARGUMENT
93101
.withDescription(
94102
String.format(
95-
"%s, %s, %s, %s or %s is not specified",
103+
"%s, %s, %s or %s is not specified",
96104
SINK_MODE_PROP,
97-
LOCATION_TYPE_PROP,
98105
WAREHOUSE_PATH_PROP,
99106
DATABASE_NAME_PROP,
100107
TABLE_NAME_PROP))
101108
.asRuntimeException();
102109
}
103110

104111
String mode = tableProperties.get(SINK_MODE_PROP);
105-
String location = tableProperties.get(LOCATION_TYPE_PROP);
106-
String warehousePath = tableProperties.get(WAREHOUSE_PATH_PROP);
107112
String databaseName = tableProperties.get(DATABASE_NAME_PROP);
108113
String tableName = tableProperties.get(TABLE_NAME_PROP);
114+
String warehousePath = getWarehousePath(tableProperties);
115+
116+
String schema = parseWarehousePathScheme(warehousePath);
109117

110118
TableIdentifier tableIdentifier = TableIdentifier.of(databaseName, tableName);
111-
HadoopCatalog hadoopCatalog = createHadoopCatalog(location, warehousePath);
119+
Configuration hadoopConf = createHadoopConf(schema, tableProperties);
120+
HadoopCatalog hadoopCatalog = new HadoopCatalog(hadoopConf, warehousePath);
112121
Table icebergTable;
113122
try {
114123
icebergTable = hadoopCatalog.loadTable(tableIdentifier);
115124
} catch (Exception e) {
116-
LOG.error("load table error: {}", e);
117125
throw Status.FAILED_PRECONDITION
118-
.withDescription("failed to load iceberg table")
126+
.withDescription(
127+
String.format("failed to load iceberg table: %s", e.getMessage()))
119128
.withCause(e)
120129
.asRuntimeException();
121130
}
122131
// check that all columns in tableSchema exist in the iceberg table
123132
for (String columnName : tableSchema.getColumnNames()) {
124133
if (icebergTable.schema().findField(columnName) == null) {
125-
LOG.error("column not found: {}", columnName);
126134
throw Status.FAILED_PRECONDITION
127-
.withDescription("table schema does not match")
135+
.withDescription(
136+
String.format(
137+
"table schema does not match. Column %s not found in iceberg table",
138+
columnName))
128139
.asRuntimeException();
129140
}
130141
}
131142
// check that all required columns in the iceberg table exist in tableSchema
132143
Set<String> columnNames = Set.of(tableSchema.getColumnNames());
133144
for (Types.NestedField column : icebergTable.schema().columns()) {
134145
if (column.isRequired() && !columnNames.contains(column.name())) {
135-
LOG.error("required column not found: {}", column.name());
136146
throw Status.FAILED_PRECONDITION
137147
.withDescription(
138148
String.format("missing a required field %s", column.name()))
@@ -153,26 +163,62 @@ public void validate(TableSchema tableSchema, Map<String, String> tablePropertie
153163
}
154164
}
155165

156-
private HadoopCatalog createHadoopCatalog(String location, String warehousePath) {
157-
Configuration hadoopConf = new Configuration();
158-
switch (location) {
159-
case "local":
160-
return new HadoopCatalog(hadoopConf, warehousePath);
161-
case "s3":
162-
hadoopConf.set(confIoImpl, s3FileIOImpl);
163-
String s3aPath = "s3a:" + warehousePath.substring(warehousePath.indexOf('/'));
164-
return new HadoopCatalog(hadoopConf, s3aPath);
165-
case "minio":
166+
private static String getWarehousePath(Map<String, String> tableProperties) {
167+
String warehousePath = tableProperties.get(WAREHOUSE_PATH_PROP);
168+
// unify s3 and s3a
169+
if (warehousePath.startsWith("s3://")) {
170+
return warehousePath.replace("s3://", "s3a://");
171+
}
172+
return warehousePath;
173+
}
174+
175+
private static String parseWarehousePathScheme(String warehousePath) {
176+
try {
177+
URI uri = new URI(warehousePath);
178+
String scheme = uri.getScheme();
179+
if (scheme == null) {
180+
throw INVALID_ARGUMENT
181+
.withDescription("warehouse path should set scheme (e.g. s3a://)")
182+
.asRuntimeException();
183+
}
184+
return scheme;
185+
} catch (URISyntaxException e) {
186+
throw INVALID_ARGUMENT
187+
.withDescription(
188+
String.format("invalid warehouse path uri: %s", e.getMessage()))
189+
.withCause(e)
190+
.asRuntimeException();
191+
}
192+
}
193+
194+
private Configuration createHadoopConf(String scheme, Map<String, String> tableProperties) {
195+
switch (scheme) {
196+
case "file":
197+
return new Configuration();
198+
case "s3a":
199+
Configuration hadoopConf = new Configuration();
166200
hadoopConf.set(confIoImpl, s3FileIOImpl);
167-
MinioUrlParser minioUrlParser = new MinioUrlParser(warehousePath);
168-
hadoopConf.set(confEndpoint, minioUrlParser.getEndpoint());
169-
hadoopConf.set(confKey, minioUrlParser.getKey());
170-
hadoopConf.set(confSecret, minioUrlParser.getSecret());
171201
hadoopConf.setBoolean(confPathStyleAccess, true);
172-
return new HadoopCatalog(hadoopConf, "s3a://" + minioUrlParser.getBucket());
202+
if (!tableProperties.containsKey(S3_ENDPOINT_PROP)) {
203+
throw INVALID_ARGUMENT
204+
.withDescription(
205+
String.format(
206+
"Should set %s for warehouse with scheme %s",
207+
S3_ENDPOINT_PROP, scheme))
208+
.asRuntimeException();
209+
}
210+
hadoopConf.set(confEndpoint, tableProperties.get(S3_ENDPOINT_PROP));
211+
if (tableProperties.containsKey(S3_ACCESS_KEY_PROP)) {
212+
hadoopConf.set(confKey, tableProperties.get(S3_ACCESS_KEY_PROP));
213+
}
214+
if (tableProperties.containsKey(S3_SECRET_KEY_PROP)) {
215+
hadoopConf.set(confSecret, tableProperties.get(S3_SECRET_KEY_PROP));
216+
}
217+
return hadoopConf;
173218
default:
174219
throw UNIMPLEMENTED
175-
.withDescription("unsupported iceberg sink type: " + location)
220+
.withDescription(
221+
String.format("scheme %s not supported for warehouse path", scheme))
176222
.asRuntimeException();
177223
}
178224
}

java/connector-node/risingwave-sink-iceberg/src/test/java/com/risingwave/connector/IcebergSinkFactoryTest.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,9 @@
3030
import org.junit.Test;
3131

3232
public class IcebergSinkFactoryTest {
33-
static String warehousePath = "/tmp/rw-sinknode/iceberg-sink/warehouse";
33+
static String warehousePath = "file:///tmp/rw-sinknode/iceberg-sink/warehouse";
3434
static String databaseName = "demo_db";
3535
static String tableName = "demo_table";
36-
static String locationType = "local";
3736
static String sinkMode = "append-only";
3837
static Schema icebergTableSchema =
3938
new Schema(
@@ -67,8 +66,6 @@ public void testCreate() throws IOException {
6766
Map.of(
6867
IcebergSinkFactory.SINK_MODE_PROP,
6968
sinkMode,
70-
IcebergSinkFactory.LOCATION_TYPE_PROP,
71-
locationType,
7269
IcebergSinkFactory.WAREHOUSE_PATH_PROP,
7370
warehousePath,
7471
IcebergSinkFactory.DATABASE_NAME_PROP,

0 commit comments

Comments
 (0)