Skip to content

Commit ec91888

Browse files
authored
Incorporates pending feedback from lazy reader PRs (#642)
Feedback from PRs: * #609 * #614 * #616 * #619 * #620 * #627 * #628 * #638 * #639
1 parent 700e983 commit ec91888

File tree

6 files changed

+39
-36
lines changed

6 files changed

+39
-36
lines changed

src/lazy/reader.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,11 @@ use crate::{IonError, IonResult};
1717
/// which may contain either a scalar value or a lazy container that may itself be traversed.
1818
///
1919
/// The values that the reader yields ([`LazyValue`],
20-
/// [`LazyBinarySequence`](crate::lazy::sequence::LazyBinarySequence), and
21-
/// [`LazyBinaryStruct`](crate::lazy::struct::LazyStruct)) are
22-
/// immutable references to the data stream, and remain valid until [`LazyApplicationReader::next`] is called
23-
/// again to advance the reader to the next top level value. This means that these references can
24-
/// be stored, read, and re-read as long as the reader remains on the same top-level value.
20+
/// [`LazyList`](crate::lazy::sequence::LazyList), [`LazySExp`](crate::lazy::sequence::LazySExp),
21+
/// and [`LazyStruct`](crate::lazy::struct::LazyStruct)) are immutable references to the data
22+
/// stream, and remain valid until [`LazyApplicationReader::next`] is called again to advance the
23+
/// reader to the next top level value. This means that these references can be stored, read, and
24+
/// re-read as long as the reader remains on the same top-level value.
2525
/// ```
2626
///# use ion_rs::IonResult;
2727
///# fn main() -> IonResult<()> {

src/lazy/str_ref.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,24 @@ use std::borrow::Cow;
55
use std::fmt::{Display, Formatter};
66
use std::ops::Deref;
77

8+
/// A reference to an immutable in-memory representation of an Ion string. To get an owned [`Str`]
9+
/// instead, see [`StrRef::to_owned`].
810
#[derive(Clone, PartialEq, Debug)]
911
pub struct StrRef<'data> {
1012
text: Cow<'data, str>,
1113
}
1214

1315
impl<'data> StrRef<'data> {
1416
pub fn to_owned(&self) -> Str {
15-
Str::from(self.as_ref())
17+
Str::from(self.text.as_ref())
1618
}
1719

1820
pub fn into_owned(self) -> Str {
1921
Str::from(self)
2022
}
2123

2224
pub fn text(&self) -> &str {
23-
self.as_ref()
25+
self.text.as_ref()
2426
}
2527
}
2628

src/lazy/text/buffer.rs

+11-22
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,6 @@ pub(crate) struct TextBufferView<'a> {
113113
offset: usize,
114114
}
115115

116-
pub(crate) type ParseResult<'a, T> = IonResult<(T, TextBufferView<'a>)>;
117-
118116
impl<'data> TextBufferView<'data> {
119117
/// Constructs a new `TextBufferView` that wraps `data`, setting the view's `offset` to zero.
120118
#[inline]
@@ -432,10 +430,10 @@ impl<'data> TextBufferView<'data> {
432430
alt((
433431
// For `null` and `bool`, we use `read_` instead of `match_` because there's no additional
434432
// parsing to be done.
435-
map(match_and_length(Self::read_null), |(ion_type, length)| {
433+
map(match_and_length(Self::match_null), |(ion_type, length)| {
436434
EncodedTextValue::new(MatchedValue::Null(ion_type), self.offset(), length)
437435
}),
438-
map(match_and_length(Self::read_bool), |(value, length)| {
436+
map(match_and_length(Self::match_bool), |(value, length)| {
439437
EncodedTextValue::new(MatchedValue::Bool(value), self.offset(), length)
440438
}),
441439
// For `int` and the other types, we use `match` and store the partially-processed input in the
@@ -662,37 +660,27 @@ impl<'data> TextBufferView<'data> {
662660
Ok((remaining, matched))
663661
}
664662

665-
/// Matches a boolean value.
666-
pub fn match_bool(self) -> IonMatchResult<'data> {
667-
recognize(Self::read_bool)(self)
668-
}
669-
670663
/// Matches and returns a boolean value.
671-
pub fn read_bool(self) -> IonParseResult<'data, bool> {
664+
pub fn match_bool(self) -> IonParseResult<'data, bool> {
672665
terminated(
673666
alt((value(true, tag("true")), value(false, tag("false")))),
674667
Self::peek_stop_character,
675668
)(self)
676669
}
677670

678-
/// Matches any type of null. (`null`, `null.null`, `null.int`, etc)
679-
pub fn match_null(self) -> IonMatchResult<'data> {
680-
recognize(Self::read_null)(self)
681-
}
682-
683-
/// Matches and returns a null value.
684-
pub fn read_null(self) -> IonParseResult<'data, IonType> {
671+
/// Matches and returns any type of null. (`null`, `null.null`, `null.int`, etc)
672+
pub fn match_null(self) -> IonParseResult<'data, IonType> {
685673
delimited(
686674
complete_tag("null"),
687-
opt(preceded(complete_char('.'), Self::read_ion_type)),
675+
opt(preceded(complete_char('.'), Self::match_ion_type)),
688676
Self::peek_stop_character,
689677
)
690678
.map(|explicit_ion_type| explicit_ion_type.unwrap_or(IonType::Null))
691679
.parse(self)
692680
}
693681

694682
/// Matches and returns an Ion type.
695-
fn read_ion_type(self) -> IonParseResult<'data, IonType> {
683+
fn match_ion_type(self) -> IonParseResult<'data, IonType> {
696684
alt((
697685
value(IonType::Null, complete_tag("null")),
698686
value(IonType::Bool, complete_tag("bool")),
@@ -1929,6 +1917,7 @@ mod tests {
19291917

19301918
mismatch_ivm("ion_1_0");
19311919
mismatch_ivm("$ion__1_0");
1920+
mismatch_ivm("$ion_1_0_0");
19321921
mismatch_ivm("$$ion_1_0");
19331922
mismatch_ivm("$ion_FF_FF");
19341923
}
@@ -2199,8 +2188,6 @@ mod tests {
21992188
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_symbol));
22002189
}
22012190

2202-
// These inputs have leading/trailing whitespace to make them more readable, but the string
2203-
// matcher doesn't accept whitespace. We'll trim each one before testing it.
22042191
let good_inputs = &[
22052192
"'hello'",
22062193
"'😀😀😀'",
@@ -2264,7 +2251,8 @@ mod tests {
22642251
MatchTest::new(input).expect_mismatch(match_length(TextBufferView::match_decimal));
22652252
}
22662253
let good_inputs = &[
2267-
"5.", "-5.", "5.0", "-5.0", "5.0d0", "-5.0d0", "5.0D0", "-5.0D0", "5.0d+1", "-5.0d-1",
2254+
"5.", "-5.", "5.0", "-5.0", "5d0", "5.d0", "5.0d0", "-5.0d0", "5.0D0", "-5.0D0",
2255+
"5.0d+1", "-5.0d-1",
22682256
];
22692257
for input in good_inputs {
22702258
match_decimal(input);
@@ -2295,6 +2283,7 @@ mod tests {
22952283
"(a b)",
22962284
"(a++)",
22972285
"(++a)",
2286+
"(a+=b)",
22982287
"(())",
22992288
"((()))",
23002289
"(1 (2 (3 4) 5) 6)",

src/lazy/text/encoded_value.rs

+3
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@ pub(crate) struct EncodedTextValue {
6363
// recognized during matching and partial information like subfield offsets can be stored here.
6464
matched_value: MatchedValue,
6565

66+
// If this value is a struct field value, this will be populated with an enum indicating
67+
// the syntax of the associated field name. If the field name is later read, the decoder
68+
// can avoid re-parsing the input from scratch.
6669
field_name_syntax: Option<MatchedFieldName>,
6770
}
6871

src/lazy/text/matched.rs

+12-7
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,7 @@ impl MatchedString {
364364
let body = matched_input.slice(3, matched_input.len() - 6);
365365
// There are no escaped characters, so we can just validate the string in-place.
366366
let mut sanitized = Vec::with_capacity(matched_input.len());
367-
decode_text_containing_escapes(
367+
replace_escapes_with_byte_values(
368368
body,
369369
&mut sanitized,
370370
// Normalize newlines
@@ -396,7 +396,7 @@ impl MatchedString {
396396
)(remaining)
397397
{
398398
remaining = remaining_after_match;
399-
decode_text_containing_escapes(
399+
replace_escapes_with_byte_values(
400400
segment_body,
401401
&mut sanitized,
402402
// Normalize newlines
@@ -430,7 +430,7 @@ impl MatchedString {
430430
// There are escaped characters. We need to build a new version of our string
431431
// that replaces the escaped characters with their corresponding bytes.
432432
let mut sanitized = Vec::with_capacity(matched_input.len());
433-
decode_text_containing_escapes(
433+
replace_escapes_with_byte_values(
434434
body,
435435
&mut sanitized,
436436
// Do not normalize newlines
@@ -443,7 +443,7 @@ impl MatchedString {
443443
}
444444
}
445445

446-
fn decode_text_containing_escapes(
446+
fn replace_escapes_with_byte_values(
447447
matched_input: TextBufferView,
448448
sanitized: &mut Vec<u8>,
449449
// If the text being escaped is in a long string or a clob, then unescaped \r\n and \r get
@@ -513,6 +513,9 @@ fn normalize_newline<'data>(
513513
}
514514
}
515515

516+
/// Matches an escape sequence at the beginning of `input` and pushes its corresponding
517+
/// byte values onto the end of `sanitized`. Returns the remaining input following the escape
518+
/// sequence.
516519
fn decode_escape_into_bytes<'data>(
517520
input: TextBufferView<'data>,
518521
sanitized: &mut Vec<u8>,
@@ -799,7 +802,7 @@ impl MatchedSymbol {
799802
// that replaces the escaped characters with their corresponding bytes.
800803
let mut sanitized = Vec::with_capacity(matched_input.len());
801804

802-
decode_text_containing_escapes(body, &mut sanitized, false, true)?;
805+
replace_escapes_with_byte_values(body, &mut sanitized, false, true)?;
803806
let text = String::from_utf8(sanitized).unwrap();
804807
Ok(RawSymbolTokenRef::Text(text.into()))
805808
}
@@ -1055,7 +1058,9 @@ impl MatchedBlob {
10551058

10561059
#[derive(Clone, Copy, Debug, PartialEq)]
10571060
pub enum MatchedClob {
1061+
// Indicates that the content of the clob was written using short-form string syntax.
10581062
Short,
1063+
// Indicates that the content of the clob was written using long-form string syntax.
10591064
Long,
10601065
}
10611066

@@ -1093,7 +1098,7 @@ impl MatchedClob {
10931098
// There are escaped characters. We need to build a new version of our string
10941099
// that replaces the escaped characters with their corresponding bytes.
10951100
let mut sanitized = Vec::with_capacity(body.len());
1096-
decode_text_containing_escapes(
1101+
replace_escapes_with_byte_values(
10971102
body,
10981103
&mut sanitized,
10991104
// Do not normalize newlines
@@ -1123,7 +1128,7 @@ impl MatchedClob {
11231128
)(remaining)
11241129
{
11251130
remaining = remaining_after_match;
1126-
decode_text_containing_escapes(
1131+
replace_escapes_with_byte_values(
11271132
segment_body,
11281133
&mut sanitized,
11291134
// Normalize newlines

src/result/decoding_error.rs

+4
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ use thiserror::Error;
77
#[error("{description}")]
88
pub struct DecodingError {
99
description: Cow<'static, str>,
10+
// This is optional because sometimes data is found to be malformed or invalid but the original
11+
// data source is not available. For example, consider a deserializer reading a symbol table
12+
// from an `Element`. If the `symbols` field is missing, it needs to raise a decoding error, but
13+
// no source position is available. Whenever possible, usages should specify the position.
1014
position: Option<Position>,
1115
}
1216

0 commit comments

Comments
 (0)