Skip to content

Commit 80be523

Browse files
committed
add option
1 parent 93fb9d4 commit 80be523

File tree

10 files changed

+78
-40
lines changed

10 files changed

+78
-40
lines changed

integration_tests/big-query-sink/create_sink.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ FROM
2323
-- bigquery.dataset= '${dataset_id}',
2424
-- bigquery.table= '${table_id}',
2525
-- access_key = '${aws_access_key}',
26-
-- secret_access = '${aws_secret_access}',
26+
-- secret_key = '${aws_secret_key}',
2727
-- region = '${aws_region}',
2828
-- force_append_only='true',
2929
-- );

integration_tests/cassandra-and-scylladb-sink/create_source.sql

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ CREATE table user_behaviors (
1111
connector = 'datagen',
1212
fields.user_id.kind = 'sequence',
1313
fields.user_id.start = '1',
14-
fields.user_id.end = '10000000',
14+
fields.user_id.end = '1000',
1515
fields.user_name.kind = 'random',
1616
fields.user_name.length = '10',
17-
datagen.rows.per.second = '1000000'
17+
datagen.rows.per.second = '10'
1818
) FORMAT PLAIN ENCODE JSON;
1919

2020
CREATE TABLE cassandra_types (

java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraConfig.java

+19
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,9 @@ public class CassandraConfig extends CommonSinkConfig {
4545
@JsonProperty(value = "cassandra.max_batch_rows")
4646
private Integer maxBatchRows = 512;
4747

48+
@JsonProperty(value = "cassandra.request_timeout_ms")
49+
private Integer requestTimeoutMs = 2000;
50+
4851
@JsonCreator
4952
public CassandraConfig(
5053
@JsonProperty(value = "cassandra.url") String url,
@@ -102,7 +105,23 @@ public Integer getMaxBatchRows() {
102105
}
103106

104107
public CassandraConfig withMaxBatchRows(Integer maxBatchRows) {
108+
if (maxBatchRows > 65536 || maxBatchRows < 1) {
109+
throw new IllegalArgumentException(
110+
"cassandra.max_batch_rows must be <= 65535 and >= 1");
111+
}
105112
this.maxBatchRows = maxBatchRows;
106113
return this;
107114
}
115+
116+
public Integer getRequestTimeoutMs() {
117+
return requestTimeoutMs;
118+
}
119+
120+
public CassandraConfig withRequestTimeoutMs(Integer requestTimeoutMs) {
121+
if (requestTimeoutMs < 1) {
122+
throw new IllegalArgumentException("cassandra.request_timeout_ms must be >= 1");
123+
}
124+
this.requestTimeoutMs = requestTimeoutMs;
125+
return this;
126+
}
108127
}

java/connector-node/risingwave-sink-cassandra/src/main/java/com/risingwave/connector/CassandraSink.java

+9
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818

1919
import com.datastax.oss.driver.api.core.CqlSession;
2020
import com.datastax.oss.driver.api.core.CqlSessionBuilder;
21+
import com.datastax.oss.driver.api.core.config.DefaultDriverOption;
22+
import com.datastax.oss.driver.api.core.config.DriverConfigLoader;
2123
import com.datastax.oss.driver.api.core.cql.*;
2224
import com.risingwave.connector.api.TableSchema;
2325
import com.risingwave.connector.api.sink.SinkRow;
@@ -50,9 +52,16 @@ public CassandraSink(TableSchema tableSchema, CassandraConfig config) {
5052
throw new IllegalArgumentException(
5153
"Invalid cassandraURL: expected `host:port`, got " + url);
5254
}
55+
56+
DriverConfigLoader loader =
57+
DriverConfigLoader.programmaticBuilder()
58+
.withInt(DefaultDriverOption.REQUEST_TIMEOUT, config.getRequestTimeoutMs())
59+
.build();
60+
5361
// check connection
5462
CqlSessionBuilder sessionBuilder =
5563
CqlSession.builder()
64+
.withConfigLoader(loader)
5665
.addContactPoint(
5766
new InetSocketAddress(hostPort[0], Integer.parseInt(hostPort[1])))
5867
.withKeyspace(config.getKeyspace())

src/connector/src/common.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ impl AwsAuthProps {
100100
),
101101
))
102102
} else {
103-
bail!("Both \"access_key\" and \"secret_access\" are required.")
103+
bail!("Both \"access_key\" and \"secret_key\" are required.")
104104
}
105105
}
106106

src/connector/src/sink/big_query.rs

+13-14
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,7 @@ use url::Url;
3333
use with_options::WithOptions;
3434
use yup_oauth2::ServiceAccountKey;
3535

36-
use super::encoder::{
37-
DateHandlingMode, JsonEncoder, RowEncoder, TimeHandlingMode, TimestampHandlingMode,
38-
TimestamptzHandlingMode,
39-
};
36+
use super::encoder::{JsonEncoder, RowEncoder};
4037
use super::writer::LogSinkerOf;
4138
use super::{SinkError, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
4239
use crate::aws_utils::load_file_descriptor_from_s3;
@@ -47,7 +44,6 @@ use crate::sink::{
4744
};
4845

4946
pub const BIGQUERY_SINK: &str = "bigquery";
50-
const BIGQUERY_INSERT_MAX_NUMS: usize = 1024;
5147

5248
#[derive(Deserialize, Debug, Clone, WithOptions)]
5349
pub struct BigQueryCommon {
@@ -61,6 +57,12 @@ pub struct BigQueryCommon {
6157
pub dataset: String,
6258
#[serde(rename = "bigquery.table")]
6359
pub table: String,
60+
#[serde(rename = "bigquery.max_batch_rows", default = "default_max_batch_rows")]
61+
pub max_batch_rows: usize,
62+
}
63+
64+
fn default_max_batch_rows() -> usize {
65+
1024
6466
}
6567

6668
impl BigQueryCommon {
@@ -312,14 +314,7 @@ impl BigQuerySinkWriter {
312314
client,
313315
is_append_only,
314316
insert_request: TableDataInsertAllRequest::new(),
315-
row_encoder: JsonEncoder::new(
316-
schema,
317-
None,
318-
DateHandlingMode::String,
319-
TimestampHandlingMode::String,
320-
TimestamptzHandlingMode::UtcString,
321-
TimeHandlingMode::Milli,
322-
),
317+
row_encoder: JsonEncoder::new_with_bigquery(schema, None),
323318
})
324319
}
325320

@@ -339,7 +334,11 @@ impl BigQuerySinkWriter {
339334
self.insert_request
340335
.add_rows(insert_vec)
341336
.map_err(|e| SinkError::BigQuery(e.into()))?;
342-
if self.insert_request.len().ge(&BIGQUERY_INSERT_MAX_NUMS) {
337+
if self
338+
.insert_request
339+
.len()
340+
.ge(&self.config.common.max_batch_rows)
341+
{
343342
self.insert_data().await?;
344343
}
345344
Ok(())

src/connector/src/sink/doris.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ use super::doris_starrocks_connector::{
3939
POOL_IDLE_TIMEOUT,
4040
};
4141
use super::{Result, SinkError, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
42-
use crate::sink::encoder::{JsonEncoder, RowEncoder, TimestampHandlingMode};
42+
use crate::sink::encoder::{JsonEncoder, RowEncoder};
4343
use crate::sink::writer::{LogSinkerOf, SinkWriterExt};
4444
use crate::sink::{DummySinkCommitCoordinator, Sink, SinkParam, SinkWriter, SinkWriterParam};
4545

@@ -294,12 +294,7 @@ impl DorisSinkWriter {
294294
inserter_inner_builder: doris_insert_builder,
295295
is_append_only,
296296
client: None,
297-
row_encoder: JsonEncoder::new_with_doris(
298-
schema,
299-
None,
300-
TimestampHandlingMode::String,
301-
decimal_map,
302-
),
297+
row_encoder: JsonEncoder::new_with_doris(schema, None, decimal_map),
303298
})
304299
}
305300

src/connector/src/sink/encoder/json.rs

+27-8
Original file line numberDiff line numberDiff line change
@@ -83,15 +83,14 @@ impl JsonEncoder {
8383
pub fn new_with_doris(
8484
schema: Schema,
8585
col_indices: Option<Vec<usize>>,
86-
timestamp_handling_mode: TimestampHandlingMode,
8786
map: HashMap<String, (u8, u8)>,
8887
) -> Self {
8988
Self {
9089
schema,
9190
col_indices,
9291
time_handling_mode: TimeHandlingMode::Milli,
9392
date_handling_mode: DateHandlingMode::String,
94-
timestamp_handling_mode,
93+
timestamp_handling_mode: TimestampHandlingMode::String,
9594
timestamptz_handling_mode: TimestamptzHandlingMode::UtcWithoutSuffix,
9695
custom_json_type: CustomJsonType::Doris(map),
9796
kafka_connect: None,
@@ -101,21 +100,33 @@ impl JsonEncoder {
101100
pub fn new_with_starrocks(
102101
schema: Schema,
103102
col_indices: Option<Vec<usize>>,
104-
timestamp_handling_mode: TimestampHandlingMode,
105103
map: HashMap<String, (u8, u8)>,
106104
) -> Self {
107105
Self {
108106
schema,
109107
col_indices,
110108
time_handling_mode: TimeHandlingMode::Milli,
111109
date_handling_mode: DateHandlingMode::String,
112-
timestamp_handling_mode,
110+
timestamp_handling_mode: TimestampHandlingMode::String,
113111
timestamptz_handling_mode: TimestamptzHandlingMode::UtcWithoutSuffix,
114112
custom_json_type: CustomJsonType::StarRocks(map),
115113
kafka_connect: None,
116114
}
117115
}
118116

117+
pub fn new_with_bigquery(schema: Schema, col_indices: Option<Vec<usize>>) -> Self {
118+
Self {
119+
schema,
120+
col_indices,
121+
time_handling_mode: TimeHandlingMode::Milli,
122+
date_handling_mode: DateHandlingMode::String,
123+
timestamp_handling_mode: TimestampHandlingMode::String,
124+
timestamptz_handling_mode: TimestamptzHandlingMode::UtcString,
125+
custom_json_type: CustomJsonType::BigQuery,
126+
kafka_connect: None,
127+
}
128+
}
129+
119130
pub fn with_kafka_connect(self, kafka_connect: KafkaConnectParams) -> Self {
120131
Self {
121132
kafka_connect: Some(Arc::new(kafka_connect)),
@@ -192,7 +203,15 @@ fn datum_to_json_object(
192203
custom_json_type: &CustomJsonType,
193204
) -> ArrayResult<Value> {
194205
let scalar_ref = match datum {
195-
None => return Ok(Value::Null),
206+
None => {
207+
if let CustomJsonType::BigQuery = custom_json_type
208+
&& matches!(field.data_type(), DataType::List(_))
209+
{
210+
return Ok(Value::Array(vec![]));
211+
} else {
212+
return Ok(Value::Null);
213+
}
214+
}
196215
Some(datum) => datum,
197216
};
198217

@@ -239,7 +258,7 @@ fn datum_to_json_object(
239258
}
240259
json!(v_string)
241260
}
242-
CustomJsonType::Es | CustomJsonType::None => {
261+
CustomJsonType::Es | CustomJsonType::None | CustomJsonType::BigQuery => {
243262
json!(v.to_text())
244263
}
245264
},
@@ -291,7 +310,7 @@ fn datum_to_json_object(
291310
}
292311
(DataType::Jsonb, ScalarRefImpl::Jsonb(jsonb_ref)) => match custom_json_type {
293312
CustomJsonType::Es | CustomJsonType::StarRocks(_) => JsonbVal::from(jsonb_ref).take(),
294-
CustomJsonType::Doris(_) | CustomJsonType::None => {
313+
CustomJsonType::Doris(_) | CustomJsonType::None | CustomJsonType::BigQuery => {
295314
json!(jsonb_ref.to_string())
296315
}
297316
},
@@ -342,7 +361,7 @@ fn datum_to_json_object(
342361
"starrocks can't support struct".to_string(),
343362
));
344363
}
345-
CustomJsonType::Es | CustomJsonType::None => {
364+
CustomJsonType::Es | CustomJsonType::None | CustomJsonType::BigQuery => {
346365
let mut map = Map::with_capacity(st.len());
347366
for (sub_datum_ref, sub_field) in struct_ref.iter_fields_ref().zip_eq_debug(
348367
st.iter()

src/connector/src/sink/encoder/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,8 @@ pub enum CustomJsonType {
144144
Es,
145145
// starrocks' need jsonb is struct
146146
StarRocks(HashMap<String, (u8, u8)>),
147+
// bigquery need null array -> []
148+
BigQuery,
147149
None,
148150
}
149151

src/connector/src/sink/starrocks.rs

+2-7
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ use with_options::WithOptions;
3535
use super::doris_starrocks_connector::{
3636
HeaderBuilder, InserterInner, InserterInnerBuilder, DORIS_SUCCESS_STATUS, STARROCKS_DELETE_SIGN,
3737
};
38-
use super::encoder::{JsonEncoder, RowEncoder, TimestampHandlingMode};
38+
use super::encoder::{JsonEncoder, RowEncoder};
3939
use super::writer::LogSinkerOf;
4040
use super::{SinkError, SinkParam, SINK_TYPE_APPEND_ONLY, SINK_TYPE_OPTION, SINK_TYPE_UPSERT};
4141
use crate::sink::writer::SinkWriterExt;
@@ -367,12 +367,7 @@ impl StarrocksSinkWriter {
367367
inserter_innet_builder: starrocks_insert_builder,
368368
is_append_only,
369369
client: None,
370-
row_encoder: JsonEncoder::new_with_starrocks(
371-
schema,
372-
None,
373-
TimestampHandlingMode::String,
374-
decimal_map,
375-
),
370+
row_encoder: JsonEncoder::new_with_starrocks(schema, None, decimal_map),
376371
})
377372
}
378373

0 commit comments

Comments
 (0)