Skip to content

Commit f16809d

Browse files
xiangjinwuStrikeW
andauthored
fix(expr): parse timestamptz without seconds but with offset (risingwavelabs#12084)
Co-authored-by: StrikeW <[email protected]>
1 parent 01ce1bb commit f16809d

File tree

11 files changed

+115
-30
lines changed

11 files changed

+115
-30
lines changed

clippy.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ disallowed-methods = [
88
{ path = "num_traits::sign::Signed::is_positive", reason = "This returns true for 0.0 but false for 0." },
99
{ path = "num_traits::sign::Signed::is_negative", reason = "This returns true for -0.0 but false for 0." },
1010
{ path = "num_traits::sign::Signed::signum", reason = "This returns 1.0 for 0.0 but 0 for 0." },
11+
{ path = "speedate::DateTime::parse_str", reason = "Please use `parse_str_rfc3339` instead." },
12+
{ path = "speedate::DateTime::parse_bytes", reason = "Please use `parse_bytes_rfc3339` instead." },
13+
{ path = "speedate::DateTime::parse_bytes_with_config", reason = "Please use `parse_bytes_rfc3339_with_config` instead." },
14+
{ path = "speedate::Date::parse_str", reason = "Please use `parse_str_rfc3339` instead." },
15+
{ path = "speedate::Date::parse_bytes", reason = "Please use `parse_bytes_rfc3339` instead." },
1116
]
1217
disallowed-types = [
1318
{ path = "num_traits::AsPrimitive", reason = "Please use `From` or `TryFrom` with `OrderedFloat` instead." },

e2e_test/batch/functions/array_concat.slt.part

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -664,17 +664,17 @@ select array_prepend(1::real, array[1]::real[]);
664664
{1,1}
665665

666666
query T
667-
select array['2020-01-02 12:34:56 -11:00'::timestamp with time zone::varchar]::timestamp[] || '2020-01-01 12:34:56'::timestamp::date;
667+
select array['2020-01-02 12:34:56 -11:00'::timestamp with time zone]::timestamp[] || '2020-01-01 12:34:56'::timestamp::date;
668668
----
669669
{"2020-01-02 23:34:56","2020-01-01 00:00:00"}
670670

671671
query T
672-
select array_append(array['2020-01-02 12:34:56 -11:00'::timestamp with time zone::varchar]::timestamp[], '2020-01-01 12:34:56'::timestamp::date);
672+
select array_append(array['2020-01-02 12:34:56 -11:00'::timestamp with time zone]::timestamp[], '2020-01-01 12:34:56'::timestamp::date);
673673
----
674674
{"2020-01-02 23:34:56","2020-01-01 00:00:00"}
675675

676676
query T
677-
select array_prepend('2020-01-01 12:34:56'::timestamp::date, array['2020-01-02 12:34:56 -11:00'::timestamp with time zone::varchar]::timestamp[]);
677+
select array_prepend('2020-01-01 12:34:56'::timestamp::date, array['2020-01-02 12:34:56 -11:00'::timestamp with time zone]::timestamp[]);
678678
----
679679
{"2020-01-01 00:00:00","2020-01-02 23:34:56"}
680680

e2e_test/batch/types/timestamptz_utc.slt.part

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,19 @@ select '2022-10-01T12:00:00Z'::timestamp with time zone;
4242
----
4343
2022-10-01 12:00:00+00:00
4444

45+
query T
46+
select '2023-11-05 01:40-07:00'::timestamptz;
47+
----
48+
2023-11-05 08:40:00+00:00
49+
50+
query T
51+
select '2023-11-05 01:40-08:00'::timestamptz;
52+
----
53+
2023-11-05 09:40:00+00:00
54+
55+
statement error
56+
select '0'::timestamptz;
57+
4558
query T
4659
select '2022-10-01 12:00:00+01:00'::timestamp with time zone BETWEEN '2022-10-01T10:59:59Z' AND '2022-10-01T11:00:01Z';
4760
----

e2e_test/source/cdc/cdc.check.slt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,3 +46,8 @@ query I
4646
select count(*) from person_rw;
4747
----
4848
3
49+
50+
query I
51+
select count(*) from tt3_rw;
52+
----
53+
2

e2e_test/source/cdc/cdc.load.slt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,38 @@ create table orders_2 (
110110
server.id = '5088'
111111
);
112112

113+
statement error
114+
create table tt3_rw (
115+
v1 int,
116+
v2 timestamp,
117+
PRIMARY KEY (v1)
118+
) with (
119+
connector = 'mysql-cdc',
120+
hostname = 'mysql',
121+
port = '3306',
122+
username = 'root',
123+
password = '123456',
124+
database.name = 'my@db',
125+
table.name = 'tt3',
126+
server.id = '5089'
127+
);
128+
129+
statement ok
130+
create table tt3_rw (
131+
v1 int,
132+
v2 timestamptz,
133+
PRIMARY KEY (v1)
134+
) with (
135+
connector = 'mysql-cdc',
136+
hostname = 'mysql',
137+
port = '3306',
138+
username = 'root',
139+
password = '123456',
140+
database.name = 'my@db',
141+
table.name = 'tt3',
142+
server.id = '5089'
143+
);
144+
113145
# Some columns missing and reordered (postgres-cdc)
114146
statement ok
115147
create table shipments_2 (

e2e_test/source/cdc/mysql_cdc.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,7 @@ VALUES (1,1,'no'),
5151

5252
CREATE USER 'dbz'@'%' IDENTIFIED BY '123456';
5353
GRANT SELECT, RELOAD, SHOW DATABASES, REPLICATION SLAVE, REPLICATION CLIENT ON *.* TO 'dbz'@'%';
54+
55+
CREATE TABLE tt3 (v1 int primary key, v2 timestamp);
56+
INSERT INTO tt3 VALUES (1, '2020-07-30 10:08:22');
57+
INSERT INTO tt3 VALUES (2, '2020-07-31 10:09:22');

java/connector-node/risingwave-connector-service/src/main/java/com/risingwave/connector/source/common/MySqlValidator.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,6 +230,8 @@ private boolean isDataTypeCompatible(String mysqlDataType, Data.DataType.TypeNam
230230
return val == Data.DataType.TypeName.DECIMAL_VALUE;
231231
case "varchar":
232232
return val == Data.DataType.TypeName.VARCHAR_VALUE;
233+
case "timestamp":
234+
return val == Data.DataType.TypeName.TIMESTAMPTZ_VALUE;
233235
default:
234236
return true; // true for other uncovered types
235237
}

src/common/src/cast/mod.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ pub fn str_to_timestamp(elem: &str) -> Result<Timestamp> {
4343

4444
#[inline]
4545
pub fn parse_naive_date(s: &str) -> Result<NaiveDate> {
46-
let res = SpeedDate::parse_str(s).map_err(|_| PARSE_ERROR_STR_TO_DATE.to_string())?;
46+
let res = SpeedDate::parse_str_rfc3339(s).map_err(|_| PARSE_ERROR_STR_TO_DATE.to_string())?;
4747
Ok(Date::from_ymd_uncheck(res.year as i32, res.month as u32, res.day as u32).0)
4848
}
4949

@@ -63,7 +63,10 @@ pub fn parse_naive_time(s: &str) -> Result<NaiveTime> {
6363

6464
#[inline]
6565
pub fn parse_naive_datetime(s: &str) -> Result<NaiveDateTime> {
66-
if let Ok(res) = SpeedDateTime::parse_str(s) {
66+
if let Ok(res) = SpeedDateTime::parse_str_rfc3339(s) {
67+
if res.time.tz_offset.is_some() {
68+
return Err(PARSE_ERROR_STR_TO_TIMESTAMP.into());
69+
}
6770
Ok(Date::from_ymd_uncheck(
6871
res.date.year as i32,
6972
res.date.month as u32,
@@ -77,7 +80,8 @@ pub fn parse_naive_datetime(s: &str) -> Result<NaiveDateTime> {
7780
)
7881
.0)
7982
} else {
80-
let res = SpeedDate::parse_str(s).map_err(|_| PARSE_ERROR_STR_TO_TIMESTAMP.to_string())?;
83+
let res = SpeedDate::parse_str_rfc3339(s)
84+
.map_err(|_| PARSE_ERROR_STR_TO_TIMESTAMP.to_string())?;
8185
Ok(
8286
Date::from_ymd_uncheck(res.year as i32, res.month as u32, res.day as u32)
8387
.and_hms_micro_uncheck(0, 0, 0, 0)
@@ -238,7 +242,7 @@ mod tests {
238242
str_to_timestamp("1999-01-08 04:02").unwrap();
239243
str_to_timestamp("1999-01-08 04:05:06").unwrap();
240244
assert_eq!(
241-
str_to_timestamp("2022-08-03T10:34:02Z").unwrap(),
245+
str_to_timestamp("2022-08-03T10:34:02").unwrap(),
242246
str_to_timestamp("2022-08-03 10:34:02").unwrap()
243247
);
244248
str_to_date("1999-01-08").unwrap();

src/common/src/types/timestamptz.rs

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ use std::io::Write;
1616
use std::str::FromStr;
1717

1818
use bytes::{Bytes, BytesMut};
19-
use chrono::{DateTime, TimeZone, Utc};
19+
use chrono::{TimeZone, Utc};
2020
use chrono_tz::Tz;
2121
use postgres_types::ToSql;
2222
use serde::{Deserialize, Serialize};
@@ -148,8 +148,32 @@ impl FromStr for Timestamptz {
148148
"Can't cast string to timestamp with time zone (expected format is YYYY-MM-DD HH:MM:SS[.D+{up to 6 digits}] followed by +hh:mm or literal Z)"
149149
, "\nFor example: '2021-04-01 00:00:00+00:00'"
150150
);
151-
let ret = s.parse::<DateTime<Utc>>().map_err(|_| ERROR_MSG)?;
152-
Ok(Timestamptz(ret.timestamp_micros()))
151+
// Try `speedate` first
152+
// * It is also used by `str_to_{date,time,timestamp}`
153+
// * It can parse without seconds `2006-01-02 15:04-07:00`
154+
let ret = match speedate::DateTime::parse_str_rfc3339(s) {
155+
Ok(r) => r,
156+
Err(_) => {
157+
// Supplement with `chrono` for existing cases:
158+
// * Extra space before offset `2006-01-02 15:04:05 -07:00`
159+
return s
160+
.parse::<chrono::DateTime<Utc>>()
161+
.map(|t| Timestamptz(t.timestamp_micros()))
162+
.map_err(|_| ERROR_MSG);
163+
}
164+
};
165+
if ret.time.tz_offset.is_none() {
166+
return Err(ERROR_MSG);
167+
}
168+
if ret.date.year < 1600 {
169+
return Err("parsing timestamptz with year < 1600 unsupported");
170+
}
171+
Ok(Timestamptz(
172+
ret.timestamp_tz()
173+
.checked_mul(1000000)
174+
.and_then(|us| us.checked_add(ret.time.microsecond.into()))
175+
.ok_or(ERROR_MSG)?,
176+
))
153177
}
154178
}
155179

src/connector/src/parser/debezium/simd_json_parser.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ mod tests {
298298
SourceColumnDesc::simple("O_DATE", DataType::Date, ColumnId::from(8)),
299299
SourceColumnDesc::simple("O_TIME", DataType::Time, ColumnId::from(9)),
300300
SourceColumnDesc::simple("O_DATETIME", DataType::Timestamp, ColumnId::from(10)),
301-
SourceColumnDesc::simple("O_TIMESTAMP", DataType::Timestamp, ColumnId::from(11)),
301+
SourceColumnDesc::simple("O_TIMESTAMP", DataType::Timestamptz, ColumnId::from(11)),
302302
SourceColumnDesc::simple("O_JSON", DataType::Jsonb, ColumnId::from(12)),
303303
]
304304
}
@@ -333,9 +333,9 @@ mod tests {
333333
assert!(row[10].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
334334
"1970-01-01T00:00:00".parse().unwrap()
335335
)))));
336-
assert!(row[11].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
337-
"1970-01-01T00:00:01".parse().unwrap()
338-
)))));
336+
assert!(row[11].eq(&Some(ScalarImpl::Timestamptz(
337+
"1970-01-01T00:00:01Z".parse().unwrap()
338+
))));
339339
assert_json_eq(&row[12], "{\"k1\": \"v1\", \"k2\": 11}");
340340
}
341341

@@ -368,9 +368,9 @@ mod tests {
368368
assert!(row[10].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
369369
"1970-01-01T00:00:00".parse().unwrap()
370370
)))));
371-
assert!(row[11].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
372-
"1970-01-01T00:00:01".parse().unwrap()
373-
)))));
371+
assert!(row[11].eq(&Some(ScalarImpl::Timestamptz(
372+
"1970-01-01T00:00:01Z".parse().unwrap()
373+
))));
374374
assert_json_eq(&row[12], "{\"k1\": \"v1\", \"k2\": 11}");
375375
}
376376

@@ -404,9 +404,9 @@ mod tests {
404404
assert!(row[10].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
405405
"5138-11-16T09:46:39".parse().unwrap()
406406
)))));
407-
assert!(row[11].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
408-
"2038-01-09T03:14:07".parse().unwrap()
409-
)))));
407+
assert!(row[11].eq(&Some(ScalarImpl::Timestamptz(
408+
"2038-01-09T03:14:07Z".parse().unwrap()
409+
))));
410410
assert_json_eq(&row[12], "{\"k1\":\"v1_updated\",\"k2\":33}");
411411
}
412412

@@ -441,9 +441,9 @@ mod tests {
441441
assert!(row[10].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
442442
"5138-11-16T09:46:39".parse().unwrap()
443443
)))));
444-
assert!(row[11].eq(&Some(ScalarImpl::Timestamp(Timestamp::new(
445-
"2038-01-09T03:14:07".parse().unwrap()
446-
)))));
444+
assert!(row[11].eq(&Some(ScalarImpl::Timestamptz(
445+
"2038-01-09T03:14:07Z".parse().unwrap()
446+
))));
447447
assert_json_eq(&row[12], "{\"k1\": \"v1_updated\", \"k2\": 33}");
448448
}
449449

src/expr/src/vector_op/timestamptz.rs

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -276,17 +276,13 @@ mod tests {
276276

277277
#[test]
278278
fn test_timestamptz_to_and_from_string() {
279-
let str1 = "0001-11-15 15:35:40.999999+08:00";
279+
let str1 = "1600-11-15 15:35:40.999999+08:00";
280280
let timestamptz1 = str_to_timestamptz(str1, "UTC").unwrap();
281-
assert_eq!(timestamptz1.timestamp_micros(), -62108094259000001);
281+
assert_eq!(timestamptz1.timestamp_micros(), -11648507059000001);
282282

283283
let mut writer = String::new();
284284
timestamptz_to_string(timestamptz1, "UTC", &mut writer).unwrap();
285-
assert_eq!(writer, "0001-11-15 07:35:40.999999+00:00");
286-
287-
let mut writer = String::new();
288-
timestamptz_to_string(timestamptz1, "UTC", &mut writer).unwrap();
289-
assert_eq!(writer, "0001-11-15 07:35:40.999999+00:00");
285+
assert_eq!(writer, "1600-11-15 07:35:40.999999+00:00");
290286

291287
let str2 = "1969-12-31 23:59:59.999999+00:00";
292288
let timestamptz2 = str_to_timestamptz(str2, "UTC").unwrap();

0 commit comments

Comments
 (0)