Skip to content

Commit aba3232

Browse files
authored
fix(sink): fix sink in to Cassandra failed when using column name containing upper case letter (#17493)
1 parent e383ad6 commit aba3232

File tree

7 files changed

+100
-51
lines changed

7 files changed

+100
-51
lines changed

ci/scripts/common.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,16 @@ get_latest_kafka_download_url() {
115115
local download_url="https://downloads.apache.org/kafka/${latest_version}/kafka_2.13-${latest_version}.tgz"
116116
echo "$download_url"
117117
}
118+
119+
get_latest_cassandra_version() {
120+
local versions=$(curl -s https://downloads.apache.org/cassandra/ | grep -Eo 'href="[0-9]+\.[0-9]+\.[0-9]+/"' | grep -Eo "[0-9]+\.[0-9]+\.[0-9]+")
121+
# Sort the version numbers and get the latest one
122+
local latest_version=$(echo "$versions" | sort -V | tail -n1)
123+
echo "$latest_version"
124+
}
125+
126+
get_latest_cassandra_download_url() {
127+
local latest_version=$(get_latest_cassandra_version)
128+
local download_url="https://downloads.apache.org/cassandra/${latest_version}/apache-cassandra-${latest_version}-bin.tar.gz"
129+
echo "$download_url"
130+
}

ci/scripts/e2e-cassandra-sink-test.sh

Lines changed: 7 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -36,37 +36,21 @@ risedev ci-start ci-sink-test
3636
# Wait cassandra server to start
3737
sleep 40
3838

39-
echo "--- create cassandra table"
40-
curl https://downloads.apache.org/cassandra/4.1.3/apache-cassandra-4.1.3-bin.tar.gz --output apache-cassandra-4.1.3-bin.tar.gz
41-
tar xfvz apache-cassandra-4.1.3-bin.tar.gz
39+
echo "--- install cassandra"
40+
wget $(get_latest_cassandra_download_url) -O cassandra_latest.tar.gz
41+
tar xfvz cassandra_latest.tar.gz
42+
export LATEST_CASSANDRA_VERSION=$(get_latest_cassandra_version)
43+
export CASSANDRA_DIR="./apache-cassandra-${LATEST_CASSANDRA_VERSION}"
4244
# remove bundled packages, and use installed packages, because Python 3.12 has removed asyncore, but I failed to install libev support for bundled Python driver.
43-
rm apache-cassandra-4.1.3/lib/six-1.12.0-py2.py3-none-any.zip
44-
rm apache-cassandra-4.1.3/lib/cassandra-driver-internal-only-3.25.0.zip
45+
rm ${CASSANDRA_DIR}/lib/six-1.12.0-py2.py3-none-any.zip
46+
rm ${CASSANDRA_DIR}/lib/cassandra-driver-internal-only-3.25.0.zip
4547
apt-get install -y libev4 libev-dev
4648
pip3 install --break-system-packages cassandra-driver
47-
48-
cd apache-cassandra-4.1.3/bin
4949
export CQLSH_HOST=cassandra-server
5050
export CQLSH_PORT=9042
51-
./cqlsh --request-timeout=20 -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo;
52-
CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);"
5351

5452
echo "--- testing sinks"
55-
cd ../../
5653
sqllogictest -p 4566 -d dev './e2e_test/sink/cassandra_sink.slt'
57-
sleep 1
58-
cd apache-cassandra-4.1.3/bin
59-
./cqlsh --request-timeout=20 -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';"
60-
61-
if cat ./query_result.csv | awk -F "," '{
62-
exit !($1 == 1 && $2 == 1 && $3 == 1 && $4 == 1.1 && $5 == 1.2 && $6 == "test" && $7 == "2013-01-01" && $8 == "2013-01-01 01:01:01.000+0000" && $9 == "False\r"); }'; then
63-
echo "Cassandra sink check passed"
64-
else
65-
echo "The output is not as expected."
66-
echo "output:"
67-
cat ./query_result.csv
68-
exit 1
69-
fi
7054

7155
echo "--- Kill cluster"
7256
cd ../../

ci/workflows/main-cron.yml

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -901,25 +901,24 @@ steps:
901901
timeout_in_minutes: 10
902902
retry: *auto-retry
903903

904-
# FIXME(xxhZs): https://github.com/risingwavelabs/risingwave/issues/17855
905-
# - label: "end-to-end cassandra sink test"
906-
# key: "e2e-cassandra-sink-tests"
907-
# command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release"
908-
# if: |
909-
# !(build.pull_request.labels includes "ci/main-cron/run-selected") && build.env("CI_STEPS") == null
910-
# || build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests"
911-
# || build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/
912-
# depends_on:
913-
# - "build"
914-
# - "build-other"
915-
# plugins:
916-
# - docker-compose#v5.1.0:
917-
# run: sink-test-env
918-
# config: ci/docker-compose.yml
919-
# mount-buildkite-agent: true
920-
# - ./ci/plugins/upload-failure-logs
921-
# timeout_in_minutes: 10
922-
# retry: *auto-retry
904+
- label: "end-to-end cassandra sink test"
905+
key: "e2e-cassandra-sink-tests"
906+
command: "ci/scripts/e2e-cassandra-sink-test.sh -p ci-release"
907+
if: |
908+
!(build.pull_request.labels includes "ci/main-cron/run-selected") && build.env("CI_STEPS") == null
909+
|| build.pull_request.labels includes "ci/run-e2e-cassandra-sink-tests"
910+
|| build.env("CI_STEPS") =~ /(^|,)e2e-cassandra-sink-tests?(,|$$)/
911+
depends_on:
912+
- "build"
913+
- "build-other"
914+
plugins:
915+
- docker-compose#v5.1.0:
916+
run: sink-test-env
917+
config: ci/docker-compose.yml
918+
mount-buildkite-agent: true
919+
- ./ci/plugins/upload-failure-logs
920+
timeout_in_minutes: 10
921+
retry: *auto-retry
923922

924923
- label: "end-to-end clickhouse sink test"
925924
key: "e2e-clickhouse-sink-tests"

e2e_test/sink/cassandra_sink.slt

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
1+
system ok
2+
${CASSANDRA_DIR}/bin/cqlsh --request-timeout=20 -e "CREATE KEYSPACE demo WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};use demo;CREATE table demo_bhv_table(v1 int primary key,v2 smallint,v3 bigint,v4 float,v5 double,v6 text,v7 date,v8 timestamp,v9 boolean);"
3+
4+
system ok
5+
${CASSANDRA_DIR}/bin/cqlsh --request-timeout=20 -e "use demo;CREATE table \"Test_uppercase\"(\"TEST_V1\" int primary key, \"TEST_V2\" int,\"TEST_V3\" int);"
6+
17
statement ok
28
CREATE TABLE t6 (v1 int primary key, v2 smallint, v3 bigint, v4 real, v5 float, v6 varchar, v7 date, v8 timestamptz, v9 boolean);
39

410
statement ok
5-
CREATE MATERIALIZED VIEW mv6 AS SELECT * FROM t6;
11+
CREATE TABLE t7 ("TEST_V1" int primary key, "TEST_V2" int, "TEST_V3" int);
612

713
statement ok
814
CREATE SINK s6
915
FROM
10-
mv6 WITH (
16+
t6 WITH (
1117
connector = 'cassandra',
1218
type = 'append-only',
1319
force_append_only='true',
@@ -17,17 +23,53 @@ FROM
1723
cassandra.datacenter = 'datacenter1',
1824
);
1925

26+
statement ok
27+
CREATE SINK s7
28+
FROM
29+
t7 WITH (
30+
connector = 'cassandra',
31+
type = 'append-only',
32+
force_append_only='true',
33+
cassandra.url = 'cassandra-server:9042',
34+
cassandra.keyspace = 'demo',
35+
cassandra.table = 'Test_uppercase',
36+
cassandra.datacenter = 'datacenter1',
37+
);
38+
2039
statement ok
2140
INSERT INTO t6 VALUES (1, 1, 1, 1.1, 1.2, 'test', '2013-01-01', '2013-01-01 01:01:01+00:00' , false);
2241

42+
statement ok
43+
INSERT INTO t7 VALUES (1, 1, 1);
44+
2345
statement ok
2446
FLUSH;
2547

2648
statement ok
2749
DROP SINK s6;
2850

2951
statement ok
30-
DROP MATERIALIZED VIEW mv6;
52+
DROP TABLE t6;
53+
54+
statement ok
55+
DROP SINK s7;
3156

3257
statement ok
33-
DROP TABLE t6;
58+
DROP TABLE t7;
59+
60+
system ok
61+
${CASSANDRA_DIR}/bin/cqlsh --request-timeout=20 -e "COPY demo.demo_bhv_table TO './query_result.csv' WITH HEADER = false AND ENCODING = 'UTF-8';"
62+
63+
system ok
64+
${CASSANDRA_DIR}/bin/cqlsh --request-timeout=20 -e "COPY demo.\"Test_uppercase\" TO './query_result2.csv' WITH HEADER = false AND ENCODING = 'UTF-8';"
65+
66+
system ok
67+
cat ./query_result.csv
68+
----
69+
1,1,1,1.1,1.2,test,2013-01-01,2013-01-01 01:01:01.000+0000,False
70+
71+
72+
system ok
73+
cat ./query_result2.csv
74+
----
75+
1,1,1

java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ public CassandraConfig(
5757
@JsonProperty(value = "type") String type) {
5858
this.url = url;
5959
this.keyspace = keyspace;
60-
this.table = table;
60+
this.table = CassandraUtil.convertCQLIdentifiers(table);
6161
this.datacenter = datacenter;
6262
this.type = type;
6363
}

java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraSink.java

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,8 +193,15 @@ public void drop() {
193193

194194
private String createInsertStatement(String tableName, TableSchema tableSchema) {
195195
String[] columnNames = tableSchema.getColumnNames();
196-
String columnNamesString = String.join(", ", columnNames);
196+
String columnNamesString =
197+
Arrays.stream(columnNames)
198+
.map(columnName -> CassandraUtil.convertCQLIdentifiers(columnName))
199+
.collect(Collectors.joining(", "));
197200
String placeholdersString = String.join(", ", Collections.nCopies(columnNames.length, "?"));
201+
System.out.println(
202+
String.format(
203+
"INSERT INTO %s (%s) VALUES (%s)",
204+
tableName, columnNamesString, placeholdersString));
198205
return String.format(
199206
"INSERT INTO %s (%s) VALUES (%s)",
200207
tableName, columnNamesString, placeholdersString);
@@ -204,11 +211,11 @@ private String createUpdateStatement(String tableName, TableSchema tableSchema)
204211
List<String> primaryKeys = tableSchema.getPrimaryKeys();
205212
String setClause = // cassandra does not allow SET on primary keys
206213
nonKeyColumns.stream()
207-
.map(columnName -> columnName + " = ?")
214+
.map(columnName -> CassandraUtil.convertCQLIdentifiers(columnName) + " = ?")
208215
.collect(Collectors.joining(", "));
209216
String whereClause =
210217
primaryKeys.stream()
211-
.map(columnName -> columnName + " = ?")
218+
.map(columnName -> CassandraUtil.convertCQLIdentifiers(columnName) + " = ?")
212219
.collect(Collectors.joining(" AND "));
213220
return String.format("UPDATE %s SET %s WHERE %s", tableName, setClause, whereClause);
214221
}
@@ -217,7 +224,7 @@ private static String createDeleteStatement(String tableName, TableSchema tableS
217224
List<String> primaryKeys = tableSchema.getPrimaryKeys();
218225
String whereClause =
219226
primaryKeys.stream()
220-
.map(columnName -> columnName + " = ?")
227+
.map(columnName -> CassandraUtil.convertCQLIdentifiers(columnName) + " = ?")
221228
.collect(Collectors.joining(" AND "));
222229
return String.format("DELETE FROM %s WHERE %s", tableName, whereClause);
223230
}

java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraUtil.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,4 +167,8 @@ public static Object convertRow(Object value, TypeName typeName) {
167167
.asRuntimeException();
168168
}
169169
}
170+
171+
public static String convertCQLIdentifiers(String identifier) {
172+
return "\"" + identifier + "\"";
173+
}
170174
}

0 commit comments

Comments
 (0)