Skip to content

Commit eda4e2b

Browse files
authored
Adds lazy reader support for timestamps (#623)
1 parent 01354e8 commit eda4e2b

File tree

6 files changed

+589
-8
lines changed

6 files changed

+589
-8
lines changed

src/lazy/any_encoding.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -528,7 +528,7 @@ mod tests {
528528
use crate::lazy::decoder::{LazyRawReader, LazyRawSequence, LazyRawValue};
529529
use crate::lazy::raw_stream_item::RawStreamItem;
530530
use crate::lazy::raw_value_ref::RawValueRef;
531-
use crate::{IonResult, RawSymbolTokenRef};
531+
use crate::{IonResult, RawSymbolTokenRef, Timestamp};
532532

533533
#[test]
534534
fn any_encoding() -> IonResult<()> {
@@ -550,6 +550,10 @@ mod tests {
550550
reader.next()?.expect_value()?.read()?,
551551
RawValueRef::Int(5.into())
552552
);
553+
assert_eq!(
554+
reader.next()?.expect_value()?.read()?,
555+
RawValueRef::Timestamp(Timestamp::with_year(2023).with_month(8).build()?)
556+
);
553557
assert_eq!(
554558
reader.next()?.expect_value()?.read()?,
555559
RawValueRef::Bool(false)
@@ -574,6 +578,7 @@ mod tests {
574578
$4::"Gary"
575579
"foo"
576580
5
581+
2023-08T
577582
false
578583
[1, 2, 3]
579584
"#;

src/lazy/text/buffer.rs

+264-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ use std::slice::Iter;
55
use std::str::FromStr;
66

77
use nom::branch::alt;
8-
use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1};
8+
use nom::bytes::streaming::{is_a, is_not, tag, take_until, take_while1, take_while_m_n};
99
use nom::character::streaming::{char, digit1, one_of, satisfy};
1010
use nom::combinator::{fail, map, not, opt, peek, recognize, success, value};
1111
use nom::error::{ErrorKind, ParseError};
@@ -17,15 +17,16 @@ use crate::lazy::encoding::TextEncoding;
1717
use crate::lazy::raw_stream_item::RawStreamItem;
1818
use crate::lazy::text::encoded_value::EncodedTextValue;
1919
use crate::lazy::text::matched::{
20-
MatchedFloat, MatchedInt, MatchedString, MatchedSymbol, MatchedValue,
20+
MatchedFloat, MatchedHoursAndMinutes, MatchedInt, MatchedString, MatchedSymbol,
21+
MatchedTimestamp, MatchedTimestampOffset, MatchedValue,
2122
};
2223
use crate::lazy::text::parse_result::{InvalidInputError, IonParseError};
2324
use crate::lazy::text::parse_result::{IonMatchResult, IonParseResult};
2425
use crate::lazy::text::raw::r#struct::{LazyRawTextField, RawTextStructIterator};
2526
use crate::lazy::text::raw::sequence::RawTextSequenceIterator;
2627
use crate::lazy::text::value::LazyRawTextValue;
2728
use crate::result::DecodingError;
28-
use crate::{IonError, IonResult, IonType};
29+
use crate::{IonError, IonResult, IonType, TimestampPrecision};
2930

3031
impl<'a> Debug for TextBufferView<'a> {
3132
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
@@ -430,6 +431,16 @@ impl<'data> TextBufferView<'data> {
430431
EncodedTextValue::new(MatchedValue::Float(matched_float), self.offset(), length)
431432
},
432433
),
434+
map(
435+
match_and_length(Self::match_timestamp),
436+
|(matched_timestamp, length)| {
437+
EncodedTextValue::new(
438+
MatchedValue::Timestamp(matched_timestamp),
439+
self.offset(),
440+
length,
441+
)
442+
},
443+
),
433444
map(
434445
match_and_length(Self::match_string),
435446
|(matched_string, length)| {
@@ -967,6 +978,212 @@ impl<'data> TextBufferView<'data> {
967978
}
968979
Err(nom::Err::Incomplete(Needed::Unknown))
969980
}
981+
982+
/// Matches a single base-10 digit, 0-9.
983+
fn match_any_digit(self) -> IonParseResult<'data, char> {
984+
satisfy(|c| c.is_ascii_digit())(self)
985+
}
986+
987+
/// Matches a timestamp of any precision.
988+
pub fn match_timestamp(self) -> IonParseResult<'data, MatchedTimestamp> {
989+
alt((
990+
Self::match_timestamp_y,
991+
Self::match_timestamp_ym,
992+
Self::match_timestamp_ymd,
993+
Self::match_timestamp_ymd_hm,
994+
Self::match_timestamp_ymd_hms,
995+
Self::match_timestamp_ymd_hms_fractional,
996+
))(self)
997+
}
998+
999+
/// Matches a timestamp with year precision.
1000+
fn match_timestamp_y(self) -> IonParseResult<'data, MatchedTimestamp> {
1001+
terminated(
1002+
Self::match_timestamp_year,
1003+
pair(tag("T"), Self::peek_stop_character),
1004+
)
1005+
.map(|_year| MatchedTimestamp::new(TimestampPrecision::Year))
1006+
.parse(self)
1007+
}
1008+
1009+
/// Matches a timestamp with month precision.
1010+
fn match_timestamp_ym(self) -> IonParseResult<'data, MatchedTimestamp> {
1011+
terminated(
1012+
pair(Self::match_timestamp_year, Self::match_timestamp_month),
1013+
pair(tag("T"), Self::peek_stop_character),
1014+
)
1015+
.map(|(_year, _month)| MatchedTimestamp::new(TimestampPrecision::Month))
1016+
.parse(self)
1017+
}
1018+
1019+
/// Matches a timestamp with day precision.
1020+
fn match_timestamp_ymd(self) -> IonParseResult<'data, MatchedTimestamp> {
1021+
terminated(
1022+
tuple((
1023+
Self::match_timestamp_year,
1024+
Self::match_timestamp_month,
1025+
Self::match_timestamp_day,
1026+
)),
1027+
pair(opt(tag("T")), Self::peek_stop_character),
1028+
)
1029+
.map(|_| MatchedTimestamp::new(TimestampPrecision::Day))
1030+
.parse(self)
1031+
}
1032+
1033+
/// Matches a timestamp with hour-and-minute precision.
1034+
fn match_timestamp_ymd_hm(self) -> IonParseResult<'data, MatchedTimestamp> {
1035+
terminated(
1036+
tuple((
1037+
Self::match_timestamp_year,
1038+
Self::match_timestamp_month,
1039+
Self::match_timestamp_day,
1040+
Self::match_timestamp_hour_and_minute,
1041+
Self::match_timestamp_offset,
1042+
)),
1043+
Self::peek_stop_character,
1044+
)
1045+
.map(|(_y, _m, _d, _hm, offset)| {
1046+
MatchedTimestamp::new(TimestampPrecision::HourAndMinute).with_offset(offset)
1047+
})
1048+
.parse(self)
1049+
}
1050+
1051+
/// Matches a timestamp with second precision.
1052+
fn match_timestamp_ymd_hms(self) -> IonParseResult<'data, MatchedTimestamp> {
1053+
terminated(
1054+
tuple((
1055+
Self::match_timestamp_year,
1056+
Self::match_timestamp_month,
1057+
Self::match_timestamp_day,
1058+
Self::match_timestamp_hour_and_minute,
1059+
Self::match_timestamp_seconds,
1060+
Self::match_timestamp_offset,
1061+
)),
1062+
Self::peek_stop_character,
1063+
)
1064+
.map(|(_y, _m, _d, _hm, _s, offset)| {
1065+
MatchedTimestamp::new(TimestampPrecision::Second).with_offset(offset)
1066+
})
1067+
.parse(self)
1068+
}
1069+
1070+
/// Matches a timestamp with second precision, including a fractional seconds component.
1071+
fn match_timestamp_ymd_hms_fractional(self) -> IonParseResult<'data, MatchedTimestamp> {
1072+
terminated(
1073+
tuple((
1074+
Self::match_timestamp_year,
1075+
Self::match_timestamp_month,
1076+
Self::match_timestamp_day,
1077+
Self::match_timestamp_hour_and_minute,
1078+
Self::match_timestamp_seconds,
1079+
Self::match_timestamp_fractional_seconds,
1080+
Self::match_timestamp_offset,
1081+
)),
1082+
Self::peek_stop_character,
1083+
)
1084+
.map(|(_y, _m, _d, _hm, _s, _f, offset)| {
1085+
MatchedTimestamp::new(TimestampPrecision::Second).with_offset(offset)
1086+
})
1087+
.parse(self)
1088+
}
1089+
1090+
/// Matches the year component of a timestamp.
1091+
fn match_timestamp_year(self) -> IonMatchResult<'data> {
1092+
recognize(take_while_m_n(4, 4, |c: u8| c.is_ascii_digit()))(self)
1093+
}
1094+
1095+
/// Matches the month component of a timestamp, including a leading `-`.
1096+
fn match_timestamp_month(self) -> IonMatchResult<'data> {
1097+
preceded(
1098+
tag("-"),
1099+
recognize(alt((
1100+
pair(char('0'), one_of("123456789")),
1101+
pair(char('1'), one_of("012")),
1102+
))),
1103+
)(self)
1104+
}
1105+
1106+
/// Matches the day component of a timestamp, including a leading `-`.
1107+
fn match_timestamp_day(self) -> IonMatchResult<'data> {
1108+
preceded(
1109+
tag("-"),
1110+
recognize(alt((
1111+
pair(char('0'), one_of("123456789")),
1112+
pair(one_of("12"), Self::match_any_digit),
1113+
pair(char('3'), one_of("01")),
1114+
))),
1115+
)(self)
1116+
}
1117+
1118+
/// Matches a leading `T`, a two-digit hour component of a timestamp, a delimiting ':', and a
1119+
/// two-digit minute component.
1120+
fn match_timestamp_hour_and_minute(
1121+
self,
1122+
) -> IonParseResult<'data, (TextBufferView<'data>, TextBufferView<'data>)> {
1123+
preceded(
1124+
tag("T"),
1125+
separated_pair(
1126+
// Hour
1127+
recognize(alt((
1128+
pair(one_of("01"), Self::match_any_digit),
1129+
pair(char('2'), one_of("0123")),
1130+
))),
1131+
// Delimiter
1132+
tag(":"),
1133+
// Minutes
1134+
recognize(pair(one_of("012345"), Self::match_any_digit)),
1135+
),
1136+
)(self)
1137+
}
1138+
1139+
/// Matches a leading `:`, and any two-digit second component from `00` to `59` inclusive.
1140+
fn match_timestamp_seconds(self) -> IonMatchResult<'data> {
1141+
preceded(
1142+
tag(":"),
1143+
recognize(pair(one_of("012345"), Self::match_any_digit)),
1144+
)(self)
1145+
}
1146+
1147+
/// Matches the fractional seconds component of a timestamp, including a leading `.`.
1148+
fn match_timestamp_fractional_seconds(self) -> IonMatchResult<'data> {
1149+
preceded(tag("."), digit1)(self)
1150+
}
1151+
1152+
/// Matches a timestamp offset of any format.
1153+
fn match_timestamp_offset(self) -> IonParseResult<'data, MatchedTimestampOffset> {
1154+
alt((
1155+
value(MatchedTimestampOffset::Zulu, tag("Z")),
1156+
value(MatchedTimestampOffset::Zulu, tag("+00:00")),
1157+
value(MatchedTimestampOffset::Unknown, tag("-00:00")),
1158+
map(
1159+
pair(one_of("-+"), Self::match_timestamp_offset_hours_and_minutes),
1160+
|(sign, (hours, _minutes))| {
1161+
let is_negative = sign == '-';
1162+
let hours_offset = hours.offset();
1163+
MatchedTimestampOffset::HoursAndMinutes(MatchedHoursAndMinutes::new(
1164+
is_negative,
1165+
hours_offset,
1166+
))
1167+
},
1168+
),
1169+
))(self)
1170+
}
1171+
1172+
/// Matches a timestamp offset encoded as a two-digit hour, a delimiting `:`, and a two-digit
1173+
/// minute.
1174+
fn match_timestamp_offset_hours_and_minutes(self) -> IonParseResult<'data, (Self, Self)> {
1175+
separated_pair(
1176+
// Hour
1177+
recognize(alt((
1178+
pair(one_of("01"), Self::match_any_digit),
1179+
pair(char('2'), one_of("0123")),
1180+
))),
1181+
// Delimiter
1182+
tag(":"),
1183+
// Minutes
1184+
recognize(pair(one_of("012345"), Self::match_any_digit)),
1185+
)(self)
1186+
}
9701187
}
9711188

9721189
// === nom trait implementations ===
@@ -1428,6 +1645,50 @@ mod tests {
14281645
}
14291646
}
14301647

1648+
#[test]
1649+
fn test_match_timestamp() {
1650+
fn match_timestamp(input: &str) {
1651+
MatchTest::new(input).expect_match(match_length(TextBufferView::match_timestamp));
1652+
}
1653+
fn mismatch_timestamp(input: &str) {
1654+
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_timestamp));
1655+
}
1656+
1657+
let good_inputs = &[
1658+
"2023T",
1659+
"2023-08T",
1660+
"2023-08-13", // T is optional for ymd
1661+
"2023-08-13T",
1662+
"2023-08-13T14:18Z",
1663+
"2023-08-13T14:18+05:00",
1664+
"2023-08-13T14:18-05:00",
1665+
"2023-08-13T14:18:35-05:00",
1666+
"2023-08-13T14:18:35.994-05:00",
1667+
];
1668+
for input in good_inputs {
1669+
match_timestamp(input);
1670+
}
1671+
1672+
let bad_inputs = &[
1673+
"2023", // No 'T'
1674+
"2023-08", // No 'T'
1675+
"20233T", // 5-digit year
1676+
"2023-13T", // Out of bounds month
1677+
"2023-08-41T", // Out of bounds day
1678+
"2023-08+18T", // Wrong delimiter
1679+
"2023-08-18T25:00Z", // Out of bounds hour
1680+
"2023-08-18T14:00", // No offset
1681+
"2023-08-18T14:62", // Out of bounds minute
1682+
"2023-08-18T14:35:61", // Out of bounds second
1683+
"2023-08-18T14:35:52.Z", // Dot but no fractional
1684+
"2023-08-18T14:35:52.000+24:30", // Out of bounds offset hour
1685+
"2023-08-18T14:35:52.000+00:60", // Out of bounds offset minute
1686+
];
1687+
for input in bad_inputs {
1688+
mismatch_timestamp(input);
1689+
}
1690+
}
1691+
14311692
#[test]
14321693
fn test_match_string() {
14331694
fn match_string(input: &str) {

src/lazy/text/encoded_value.rs

+1
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ impl EncodedTextValue {
126126
MatchedValue::Bool(_) => IonType::Bool,
127127
MatchedValue::Int(_) => IonType::Int,
128128
MatchedValue::Float(_) => IonType::Float,
129+
MatchedValue::Timestamp(_) => IonType::Timestamp,
129130
MatchedValue::String(_) => IonType::String,
130131
MatchedValue::Symbol(_) => IonType::Symbol,
131132
MatchedValue::List => IonType::List,

0 commit comments

Comments
 (0)