Skip to content

Raw 1.1 writer now clears encoding buffer on flush() #871

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 97 additions & 31 deletions src/lazy/binary/raw/v1_1/e_expression.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
#![allow(non_camel_case_types)]

use std::fmt::{Debug, Formatter};
use std::ops::Range;

use crate::lazy::binary::raw::v1_1::immutable_buffer::{
ArgGrouping, ArgGroupingBitmapIterator, BinaryBuffer,
};
use crate::lazy::binary::raw::v1_1::value::DelimitedContents;
use crate::lazy::decoder::LazyRawValueExpr;
use crate::lazy::encoding::BinaryEncoding_1_1;
use crate::lazy::expanded::e_expression::EExpArgGroup;
Expand All @@ -17,6 +15,8 @@ use crate::lazy::expanded::EncodingContextRef;
use crate::lazy::text::raw::v1_1::arg_group::{EExpArg, EExpArgExpr};
use crate::lazy::text::raw::v1_1::reader::MacroIdRef;
use crate::{try_or_some_err, v1_1, Environment, HasRange, HasSpan, IonResult, Span};
use std::fmt::{Debug, Formatter};
use std::ops::Range;

/// An e-expression which has been parsed from a binary Ion 1.1 stream.
#[derive(Copy, Clone)]
Expand Down Expand Up @@ -332,24 +332,53 @@ impl<'top> Iterator for BinaryEExpArgsInputIter<'top> {
ArgGrouping::ArgGroup => {
//...then it starts with a FlexUInt that indicates whether the group is length-prefixed
// or delimited.
let (group_header_flex_uint, _remaining_args_input) =
let (group_header_flex_uint, remaining_args_input) =
try_or_some_err!(self.remaining_args_buffer.read_flex_uint());
let bytes_to_read = match group_header_flex_uint.value() {
0 => todo!("delimited argument groups"),
n_bytes => n_bytes as usize,
};
// If it's length-prefixed, we don't need to inspect its contents. We can build an
// ArgGroup using the unexamined bytes; we'll parse them later if they get evaluated.
let arg_group_length = group_header_flex_uint.size_in_bytes() + bytes_to_read;
let arg_group = BinaryEExpArgGroup::new(
parameter,
self.remaining_args_buffer.slice(0, arg_group_length),
group_header_flex_uint.size_in_bytes() as u8,
);
(
EExpArg::new(parameter, EExpArgExpr::ArgGroup(arg_group)),
self.remaining_args_buffer.consume(arg_group_length),
)
match group_header_flex_uint.value() {
// A FlexUInt of `0` indicates that the arg group is delimited.
// We need to step through it to determine its span.
0 if *parameter.encoding() == ParameterEncoding::Tagged => {
// Read the delimited sequence of tagged expressions, caching them as we go.
let (delimited_contents, remaining) =
try_or_some_err!(remaining_args_input.peek_delimited_sequence_body());
let DelimitedContents::Values(delimited_values) = delimited_contents else {
unreachable!("parser found a delimited arg group")
};
// Isolate the part of the input buffer that represented the argument group.
let matched_buffer = self
.remaining_args_buffer
.slice(0, remaining.offset() - self.remaining_args_buffer.offset());
let arg_group = BinaryEExpArgGroup::new(
parameter,
matched_buffer,
// Make a note of the leading FlexUInt header size so we can skip it later.
group_header_flex_uint.size_in_bytes() as u8,
)
.with_delimited_values(delimited_values);
(
EExpArg::new(parameter, EExpArgExpr::ArgGroup(arg_group)),
remaining,
)
}
0 => todo!("delimited argument groups for untagged encodings"),
// The arg group is length-prefixed; we don't need to inspect its contents yet.
// We can build an ArgGroup using the unexamined bytes;
// we'll parse them later if they get evaluated.
n_bytes => {
let bytes_to_read = n_bytes as usize;
let arg_group_length =
group_header_flex_uint.size_in_bytes() + bytes_to_read;
let arg_group = BinaryEExpArgGroup::new(
parameter,
self.remaining_args_buffer.slice(0, arg_group_length),
group_header_flex_uint.size_in_bytes() as u8,
);
(
EExpArg::new(parameter, EExpArgExpr::ArgGroup(arg_group)),
self.remaining_args_buffer.consume(arg_group_length),
)
}
}
}
};

Expand Down Expand Up @@ -401,6 +430,8 @@ pub struct BinaryEExpArgGroup<'top> {
parameter: &'top Parameter,
input: BinaryBuffer<'top>,
header_size: u8,
// If this is a delimited arg group, this cache will hold the expressions found during parsing.
delimited_values: Option<&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]>,
}

impl<'top> BinaryEExpArgGroup<'top> {
Expand All @@ -409,8 +440,17 @@ impl<'top> BinaryEExpArgGroup<'top> {
parameter,
input,
header_size,
delimited_values: None,
}
}

pub fn with_delimited_values(
mut self,
delimited_values: &'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>],
) -> Self {
self.delimited_values = Some(delimited_values);
self
}
}

impl HasRange for BinaryEExpArgGroup<'_> {
Expand All @@ -428,28 +468,48 @@ impl<'top> HasSpan<'top> for BinaryEExpArgGroup<'top> {
#[derive(Debug, Copy, Clone)]
pub struct BinaryEExpArgGroupIterator<'top> {
parameter: &'top Parameter,
remaining_args_buffer: BinaryBuffer<'top>,
source: BinaryEExpArgGroupIteratorSource<'top>,
}

#[derive(Debug, Copy, Clone)]
enum BinaryEExpArgGroupIteratorSource<'top> {
/// The iterator is parsing expressions from a slice of the input buffer.
Input(BinaryBuffer<'top>),
/// The iterator is iterating over the bump-allocated cache of previously parsed expressions.
Cache(&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]),
}

impl<'top> IsExhaustedIterator<'top, BinaryEncoding_1_1> for BinaryEExpArgGroupIterator<'top> {
fn is_exhausted(&self) -> bool {
self.remaining_args_buffer.is_empty()
match self.source {
BinaryEExpArgGroupIteratorSource::Input(ref input) => input.is_empty(),
BinaryEExpArgGroupIteratorSource::Cache(cache) => cache.is_empty(),
}
}
}

impl<'top> Iterator for BinaryEExpArgGroupIterator<'top> {
type Item = IonResult<LazyRawValueExpr<'top, BinaryEncoding_1_1>>;

fn next(&mut self) -> Option<Self::Item> {
if self.remaining_args_buffer.is_empty() {
return None;
match self.source {
BinaryEExpArgGroupIteratorSource::Input(ref mut input) => {
if input.is_empty() {
return None;
}
let (expr, remaining) = try_or_some_err! {
// TODO: Other encodings
input.expect_sequence_value_expr("eexp arg group subarg")
};
*input = remaining;
Some(Ok(expr))
}
BinaryEExpArgGroupIteratorSource::Cache(ref mut cache) => {
let expr = cache.first()?;
*cache = &cache[1..];
Some(Ok(*expr))
}
}
let (expr, remaining) = try_or_some_err! {
// TODO: Other encodings
self.remaining_args_buffer.expect_sequence_value_expr("eexp arg group subarg")
};
self.remaining_args_buffer = remaining;
Some(Ok(expr))
}
}

Expand All @@ -458,9 +518,15 @@ impl<'top> IntoIterator for BinaryEExpArgGroup<'top> {
type IntoIter = BinaryEExpArgGroupIterator<'top>;

fn into_iter(self) -> Self::IntoIter {
let source = match self.delimited_values {
None => BinaryEExpArgGroupIteratorSource::Input(
self.input.consume(self.header_size as usize),
),
Some(delimited_values) => BinaryEExpArgGroupIteratorSource::Cache(delimited_values),
};
BinaryEExpArgGroupIterator {
parameter: self.parameter,
remaining_args_buffer: self.input.consume(self.header_size as usize),
source,
}
}
}
Expand Down
8 changes: 7 additions & 1 deletion src/lazy/binary/raw/v1_1/immutable_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -466,7 +466,13 @@ impl<'a> BinaryBuffer<'a> {
pub(crate) fn peek_delimited_sequence(
self,
) -> IonResult<(DelimitedContents<'a>, BinaryBuffer<'a>)> {
let mut input = self.consume(1);
self.consume(1).peek_delimited_sequence_body()
}

pub(crate) fn peek_delimited_sequence_body(
self,
) -> IonResult<(DelimitedContents<'a>, BinaryBuffer<'a>)> {
let mut input = self;
let mut values =
BumpVec::<LazyRawValueExpr<'a, v1_1::Binary>>::new_in(self.context.allocator());

Expand Down
13 changes: 13 additions & 0 deletions src/lazy/binary/raw/v1_1/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -280,10 +280,23 @@ impl<'top> LazyRawValue<'top, BinaryEncoding_1_1> for &'top LazyRawBinaryValue_1
}
}

/// Nested expressions parsed and cached while reading (e.g.) a [`LazyRawBinaryValue_1_1`].
#[derive(Debug, Copy, Clone)]
pub enum DelimitedContents<'top> {
/// This value has no delimited nested data.
/// Examples:
/// * a scalar value like a string or timestamp
/// * a length-prefixed list
// This type could also be expressed as `Option<DelimitedContents<'top>>`, with its `None`
// variant removed.
None,
/// This value contains a delimited sequence of values.
/// Examples:
/// * delimited list
/// * delimited s-expression
/// * delimited argument expression group
Values(&'top [LazyRawValueExpr<'top, BinaryEncoding_1_1>]),
/// This value contains a delimited sequence of struct fields.
Fields(&'top [LazyRawFieldExpr<'top, BinaryEncoding_1_1>]),
}

Expand Down
44 changes: 34 additions & 10 deletions src/lazy/encoder/binary/v1_1/writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,15 @@ impl<W: Write> LazyRawBinaryWriter_1_1<W> {
encoding_buffer_ptr,
} = self;

let encoding_buffer = match encoding_buffer_ptr {
// If `encoding_buffer_ptr` is set, get the slice of bytes to which it refers.
Some(ptr) => unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() },
// Otherwise, there's nothing in the buffer. Use an empty slice.
None => &[],
};
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
if let Some(ptr) = encoding_buffer_ptr {
let encoding_buffer = unsafe { ptr_to_ref::<'_, BumpVec<'_, u8>>(*ptr).as_slice() };
// Write our top level encoding buffer's contents to the output sink.
output.write_all(encoding_buffer)?;
// Flush the output sink, which may have its own buffers.
output.flush()?;
}
// Now that we've written the encoding buffer's contents to output, clear it.
self.encoding_buffer_ptr = None;
Comment on lines +82 to +83
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line was missing in the original impl, resulting in a dangling pointer. The second time flush() was called, the match arm on the old line 77 would incorrectly be taken.

// Clear the allocator. A new encoding buffer will be allocated on the next write.
allocator.reset();
Ok(())
Expand Down Expand Up @@ -183,3 +182,28 @@ impl<W: Write> SequenceWriter for LazyRawBinaryWriter_1_1<W> {
Ok(self.output)
}
}

#[cfg(test)]
mod tests {
use crate::{v1_1, IonResult};

#[test]
fn flush_clears_encoding_buffer() -> IonResult<()> {
use crate::Writer;
let mut writer = Writer::new(v1_1::Binary, Vec::new()).expect("BinaryWriter::new() failed");

writer.write(42)?;
writer.flush()?;
let expected_output = writer.output().clone();
writer.flush()?;
assert_eq!(
*writer.output(),
expected_output,
"actual\n{:x?}\nwas not eq to\n{:x?}",
writer.output(),
expected_output
);

Ok(())
}
}
32 changes: 31 additions & 1 deletion src/lazy/expanded/macro_evaluator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1292,7 +1292,7 @@ mod tests {
0xEF, // System macro, address follows as 1-byte FixedUInt
0x03, // make_string
0x02, // Argument group
0x11, // FlexUInt 8
0x11, // FlexUInt 8: 8-byte group
0x93, // Opcode: 3-byte string follows
0x66, 0x6F, 0x6F, // "foo"
0x93, // Opcode: 3-byte string follows
Expand All @@ -1302,6 +1302,36 @@ mod tests {
)
}

#[test]
fn read_system_eexp_with_delimited_tagged_arg_group() -> IonResult<()> {
// Empty delimited argument group
bin_stream_eq(
&[
0xEF, // System macro, address follows as 1-byte FixedUInt
0x03, // make_string
0x02, // Argument group
0x01, // FlexUInt 0: delimited group
0xF0, // Delimited END
],
r#" "" // <-- empty string "#,
)?;
// Populated delimited argument group
bin_stream_eq(
&[
0xEF, // System macro, address follows as 1-byte FixedUInt
0x03, // make_string
0x02, // Argument group
0x01, // FlexUInt 0: delimited group
0x93, // Opcode: 3-byte string follows
0x66, 0x6F, 0x6F, // "foo"
0x93, // Opcode: 3-byte string follows
0x62, 0x61, 0x72, // "bar"
0xF0, // Delimited END
],
r#" "foobar" "#,
)
}

#[test]
fn multiple_top_level_values() -> IonResult<()> {
eval_template_invocation(
Expand Down
Loading