Skip to content

Commit 75b0763

Browse files
authored
Makes AnnotatableWriter and writer's symbol table experimentally pub (#889)
* Makes Writer symbol table experimentally pub * Makes AnnotatableWriter experimentally pub * Clippy suggestions RE: variant name prefixes
1 parent 4e0d272 commit 75b0763

File tree

11 files changed

+116
-52
lines changed

11 files changed

+116
-52
lines changed

src/element/mod.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ use std::cmp::Ordering;
2626
use std::fmt::{Display, Formatter};
2727
use std::io;
2828

29-
use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, Timestamp};
29+
use crate::{ion_data, Decimal, Int, IonResult, IonType, Str, Symbol, SymbolRef, Timestamp};
3030
use crate::{Blob, Bytes, Clob, List, SExp, Struct};
3131
// Re-export the Value variant types and traits so they can be accessed directly from this module.
3232
use crate::element::builders::{SequenceBuilder, StructBuilder};
@@ -251,6 +251,12 @@ impl From<Symbol> for Value {
251251
}
252252
}
253253

254+
impl From<SymbolRef<'_>> for Value {
255+
fn from(sym_val: SymbolRef<'_>) -> Self {
256+
Value::Symbol(sym_val.to_owned())
257+
}
258+
}
259+
254260
impl From<&[u8]> for Value {
255261
fn from(value: &[u8]) -> Self {
256262
Value::Blob(value.into())

src/lazy/any_encoding.rs

-1
Original file line numberDiff line numberDiff line change
@@ -726,7 +726,6 @@ pub struct LazyRawAnyValue<'top> {
726726

727727
impl<'top> LazyRawAnyValue<'top> {
728728
/// Returns an enum indicating the encoding that backs this lazy value.
729-
#[cfg(feature = "experimental-tooling-apis")]
730729
pub fn kind(&self) -> LazyRawValueKind<'top> {
731730
self.encoding
732731
}

src/lazy/encoder/binary/v1_1/value_writer.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ impl BinaryValueWriter_1_1<'_, '_> {
6161
pub fn with_inline_symbol_text(mut self) -> Self {
6262
self.value_writer_config = self
6363
.value_writer_config
64-
.with_symbol_value_encoding(SymbolValueEncoding::WriteAsInlineText);
64+
.with_symbol_value_encoding(SymbolValueEncoding::InlineText);
6565
self
6666
}
6767

src/lazy/encoder/value_writer.rs

+34
Original file line numberDiff line numberDiff line change
@@ -481,3 +481,37 @@ pub trait SequenceWriter: MakeValueWriter {
481481
Ok(self)
482482
}
483483
}
484+
485+
#[cfg(all(test, feature = "experimental-reader-writer"))]
486+
mod tests {
487+
use crate::symbol_ref::AsSymbolRef;
488+
use crate::{ion_seq, v1_0, Element, IntoAnnotatedElement, SequenceWriter, Writer};
489+
use crate::{AnnotatableWriter, IonResult, ValueWriter};
490+
#[test]
491+
fn save_and_reuse_symbol_id() -> IonResult<()> {
492+
let mut writer = Writer::new(v1_0::Binary, vec![])?;
493+
let name_symbol = writer
494+
.value_writer()
495+
.symbol_table()
496+
.sid_for("name")
497+
.unwrap();
498+
writer
499+
// Write the symbol twice using its ID
500+
.write_symbol(name_symbol)?
501+
.write_symbol(name_symbol)?
502+
// Use the ID again as an annotation...
503+
.value_writer()
504+
.with_annotations(name_symbol)?
505+
// ...when writing the symbol once more.
506+
.write_symbol(name_symbol)?;
507+
let bytes = writer.close()?;
508+
let actual = Element::read_all(&bytes)?;
509+
let expected = ion_seq!(
510+
"name".as_symbol_ref()
511+
"name".as_symbol_ref()
512+
"name".as_symbol_ref().with_annotations(["name"])
513+
);
514+
assert_eq!(actual, expected);
515+
Ok(())
516+
}
517+
}

src/lazy/encoder/value_writer_config.rs

+16-15
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,13 @@ pub enum ContainerEncoding {
3838
pub enum SymbolValueEncoding {
3939
/// Add all symbol values to the symbol table and encode them as symbol IDs.
4040
#[default]
41-
WriteAsSymbolIds,
41+
SymbolIds,
4242
/// Do not add symbol values to the symbol table; write their text inline.
4343
/// Symbol values specified as symbol IDs will not be mapped to text.
44-
WriteAsInlineText,
44+
InlineText,
4545
/// If a symbol value is already in the symbol table, encode it as a symbol ID.
4646
/// If it is not already in the symbol table, encode its text inline.
47-
WriteNewSymbolsAsInlineText,
47+
NewSymbolsAsInlineText,
4848
}
4949

5050
/// Configuration options for encoding an annotations sequence.
@@ -53,48 +53,49 @@ pub enum SymbolValueEncoding {
5353
pub enum AnnotationsEncoding {
5454
/// Add all annotations to the symbol table and encode them as symbol IDs.
5555
#[default]
56-
WriteAsSymbolIds,
56+
SymbolIds,
5757
/// Do not add annotations to the symbol table; write their text inline.
5858
/// Annotations specified as symbol IDs will not be mapped to text.
59-
WriteAsInlineText,
59+
InlineText,
6060
/// If an annotation is already in the symbol table, encode it as a symbol ID.
6161
/// If it is not already in the symbol table, encode its text inline.
62-
WriteNewSymbolsAsInlineText,
62+
NewSymbolsAsInlineText,
6363
}
6464

6565
/// Configuration options for encoding a struct field name.
6666
#[derive(Copy, Clone, PartialEq, Eq, Debug, Default)]
6767
#[non_exhaustive]
68+
#[allow(clippy::enum_variant_names)]
6869
pub enum FieldNameEncoding {
6970
/// Add all field names to the symbol table and encode them as symbol IDs.
7071
#[default]
71-
WriteAsSymbolIds,
72+
SymbolIds,
7273
/// Do not add field names to the symbol table; write their text inline.
7374
/// Field names specified as symbol IDs will not be mapped to text.
74-
WriteAsInlineText,
75+
InlineText,
7576
/// If a field name is already in the symbol table, encode it as a symbol ID.
7677
/// If it is not already in the symbol table, encode its text inline.
77-
WriteNewSymbolsAsInlineText,
78+
NewSymbolsAsInlineText,
7879
}
7980

8081
impl ValueWriterConfig {
8182
/// Constructs a `ValueWriterConfig` that writes all symbol tokens as inline text.
8283
pub const fn text() -> Self {
8384
ValueWriterConfig {
8485
container_encoding: ContainerEncoding::Delimited,
85-
symbol_value_encoding: SymbolValueEncoding::WriteAsInlineText,
86-
annotations_encoding: AnnotationsEncoding::WriteAsInlineText,
87-
field_name_encoding: FieldNameEncoding::WriteAsInlineText,
86+
symbol_value_encoding: SymbolValueEncoding::InlineText,
87+
annotations_encoding: AnnotationsEncoding::InlineText,
88+
field_name_encoding: FieldNameEncoding::InlineText,
8889
}
8990
}
9091

9192
/// Constructs a `ValueWriterConfig` that writes all symbol tokens as symbol IDs.
9293
pub const fn binary() -> Self {
9394
ValueWriterConfig {
9495
container_encoding: ContainerEncoding::LengthPrefixed,
95-
symbol_value_encoding: SymbolValueEncoding::WriteAsSymbolIds,
96-
annotations_encoding: AnnotationsEncoding::WriteAsSymbolIds,
97-
field_name_encoding: FieldNameEncoding::WriteAsSymbolIds,
96+
symbol_value_encoding: SymbolValueEncoding::SymbolIds,
97+
annotations_encoding: AnnotationsEncoding::SymbolIds,
98+
field_name_encoding: FieldNameEncoding::SymbolIds,
9899
}
99100
}
100101

src/lazy/encoder/writer.rs

+48-24
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,18 @@ impl<E: Encoding, Output: Write> Writer<E, Output> {
125125
Ok(self.output)
126126
}
127127

128+
#[cfg(feature = "experimental-reader-writer")]
129+
#[inline]
130+
pub fn symbol_table(&self) -> &SymbolTable {
131+
&self.context.symbol_table
132+
}
133+
134+
#[cfg(not(feature = "experimental-reader-writer"))]
135+
#[inline]
136+
pub(crate) fn symbol_table(&self) -> &SymbolTable {
137+
&self.context.symbol_table
138+
}
139+
128140
/// Helper method to encode an LST append containing pending symbols.
129141
fn write_lst_append(&mut self) -> IonResult<()> {
130142
let Self {
@@ -235,9 +247,21 @@ impl<'a, V: ValueWriter> ApplicationValueWriter<'a, V> {
235247
}
236248
}
237249

238-
fn symbol_table(&mut self) -> &mut SymbolTable {
250+
fn symbol_table_mut(&mut self) -> &mut SymbolTable {
239251
&mut self.encoding.symbol_table
240252
}
253+
254+
#[cfg(feature = "experimental-reader-writer")]
255+
#[inline]
256+
pub fn symbol_table(&self) -> &SymbolTable {
257+
&self.encoding.symbol_table
258+
}
259+
260+
#[cfg(not(feature = "experimental-reader-writer"))]
261+
#[inline]
262+
pub(crate) fn symbol_table(&self) -> &SymbolTable {
263+
&self.encoding.symbol_table
264+
}
241265
}
242266

243267
impl ApplicationValueWriter<'_, BinaryValueWriter_1_1<'_, '_>> {
@@ -281,15 +305,15 @@ impl<V: ValueWriter> AnnotatableWriter for ApplicationValueWriter<'_, V> {
281305
{
282306
let mut annotations = annotations.into_annotations_vec();
283307
match self.value_writer_config.annotations_encoding() {
284-
AnnotationsEncoding::WriteAsSymbolIds => {
308+
AnnotationsEncoding::SymbolIds => {
285309
// Intern all text so everything we write is a symbol ID
286310
self.intern_all_annotations(&mut annotations)?
287311
}
288-
AnnotationsEncoding::WriteAsInlineText => {
312+
AnnotationsEncoding::InlineText => {
289313
// Validate the symbol IDs, write the text as-is
290314
self.validate_all_symbol_ids(&mut annotations)?
291315
}
292-
AnnotationsEncoding::WriteNewSymbolsAsInlineText => {
316+
AnnotationsEncoding::NewSymbolsAsInlineText => {
293317
// Map all known strings to symbol IDs, leave new text as is.
294318
self.map_known_symbols_to_symbol_ids(&mut annotations)?
295319
}
@@ -326,15 +350,15 @@ impl<V: ValueWriter> ApplicationValueWriter<'_, V> {
326350
}
327351
// The token is text...
328352
RawSymbolRef::Text(text) => {
329-
let sid = match self.symbol_table().sid_for(&text) {
353+
let sid = match self.symbol_table().sid_for(text) {
330354
Some(sid) => {
331355
//...that was already in the symbol table.
332356
sid
333357
}
334358
None => {
335359
// ...that we need to add to the symbol table.
336360
self.encoding.num_pending_symbols += 1;
337-
self.symbol_table().add_symbol_for_text(text)
361+
self.symbol_table_mut().add_symbol_for_text(text)
338362
}
339363
};
340364
*annotation = RawSymbolRef::SymbolId(sid);
@@ -389,7 +413,7 @@ impl<V: ValueWriter> ApplicationValueWriter<'_, V> {
389413
}
390414
// The token is text...
391415
RawSymbolRef::Text(text) => {
392-
match self.symbol_table().sid_for(&text) {
416+
match self.symbol_table_mut().sid_for(text) {
393417
Some(sid) => {
394418
//...that was already in the symbol table.
395419
*annotation = RawSymbolRef::SymbolId(sid);
@@ -452,9 +476,9 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> {
452476
SystemSymbol_1_1(symbol) => SystemSymbol_1_1(symbol),
453477
Text(text) => {
454478
match value_writer_config.symbol_value_encoding() {
455-
WriteAsSymbolIds => {
479+
SymbolIds => {
456480
// Map the text to a symbol ID.
457-
match encoding.symbol_table.sid_for(&text) {
481+
match encoding.symbol_table.sid_for(text) {
458482
// If it's already in the symbol table, use that SID.
459483
Some(symbol_id) => SymbolId(symbol_id),
460484
// Otherwise, add it to the symbol table.
@@ -464,15 +488,15 @@ impl<'value, V: ValueWriter> ValueWriter for ApplicationValueWriter<'value, V> {
464488
}
465489
}
466490
}
467-
WriteNewSymbolsAsInlineText => {
491+
NewSymbolsAsInlineText => {
468492
// If the text is in the symbol table, use the symbol ID. Otherwise, use the text itself.
469-
match encoding.symbol_table.sid_for(&text) {
493+
match encoding.symbol_table.sid_for(text) {
470494
Some(symbol_id) => SymbolId(symbol_id),
471495
None => Text(text),
472496
}
473497
}
474498
// We have text and we want to write text. Nothing to do.
475-
WriteAsInlineText => Text(text),
499+
InlineText => Text(text),
476500
}
477501
}
478502
};
@@ -585,18 +609,18 @@ impl<V: ValueWriter> FieldEncoder for ApplicationStructWriter<'_, V> {
585609
// From here on, we're dealing with text.
586610

587611
// If the struct writer is configured to write field names as text, do that.
588-
if self.value_writer_config.field_name_encoding() == FieldNameEncoding::WriteAsInlineText {
612+
if self.value_writer_config.field_name_encoding() == FieldNameEncoding::InlineText {
589613
return self.raw_struct_writer.encode_field_name(text);
590614
}
591615

592616
// Otherwise, see if the symbol is already in the symbol table.
593-
let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(&text) {
617+
let token: RawSymbolRef<'_> = match self.encoding.symbol_table.sid_for(text) {
594618
// If so, use the existing ID.
595619
Some(sid) => sid.into(),
596620
// If it's not but the struct writer is configured to intern new text, add it to the
597621
// symbol table.
598622
None if self.value_writer_config.field_name_encoding()
599-
== FieldNameEncoding::WriteAsSymbolIds =>
623+
== FieldNameEncoding::SymbolIds =>
600624
{
601625
self.encoding.num_pending_symbols += 1;
602626
self.encoding.symbol_table.add_symbol_for_text(text).into()
@@ -828,7 +852,7 @@ mod tests {
828852
fn intern_new_symbol_values() -> IonResult<()> {
829853
use RawSymbolRef::*;
830854
symbol_value_encoding_test(
831-
SymbolValueEncoding::WriteAsSymbolIds,
855+
SymbolValueEncoding::SymbolIds,
832856
[
833857
(Text("$ion_symbol_table"), &[0xE1, 0x03]),
834858
(Text("name"), &[0xE1, 0x04]),
@@ -842,7 +866,7 @@ mod tests {
842866
fn do_not_intern_new_symbol_values() -> IonResult<()> {
843867
use RawSymbolRef::*;
844868
symbol_value_encoding_test(
845-
SymbolValueEncoding::WriteNewSymbolsAsInlineText,
869+
SymbolValueEncoding::NewSymbolsAsInlineText,
846870
[
847871
// Known text symbols are written as SIDs
848872
(Text("$ion_symbol_table"), &[0xE1, 0x03]),
@@ -860,7 +884,7 @@ mod tests {
860884
fn encode_all_text_as_is() -> IonResult<()> {
861885
use RawSymbolRef::*;
862886
symbol_value_encoding_test(
863-
SymbolValueEncoding::WriteAsInlineText,
887+
SymbolValueEncoding::InlineText,
864888
[
865889
// Known text symbols are written as inline text
866890
(Text("name"), &[0xA4, 0x6E, 0x61, 0x6D, 0x65]),
@@ -903,7 +927,7 @@ mod tests {
903927
fn intern_new_annotations() -> IonResult<()> {
904928
use RawSymbolRef::*;
905929
annotations_sequence_encoding_test(
906-
AnnotationsEncoding::WriteAsSymbolIds,
930+
AnnotationsEncoding::SymbolIds,
907931
&[
908932
Text("$ion_symbol_table"),
909933
Text("name"),
@@ -925,7 +949,7 @@ mod tests {
925949
fn write_new_annotations_as_text() -> IonResult<()> {
926950
use RawSymbolRef::*;
927951
annotations_sequence_encoding_test(
928-
AnnotationsEncoding::WriteNewSymbolsAsInlineText,
952+
AnnotationsEncoding::NewSymbolsAsInlineText,
929953
&[
930954
Text("$ion_symbol_table"),
931955
Text("name"),
@@ -950,7 +974,7 @@ mod tests {
950974
fn write_text_annotations_as_is() -> IonResult<()> {
951975
use RawSymbolRef::*;
952976
annotations_sequence_encoding_test(
953-
AnnotationsEncoding::WriteAsInlineText,
977+
AnnotationsEncoding::InlineText,
954978
&[Text("name"), SymbolId(6), Text("foo")],
955979
&[
956980
0xE9, // Opcode: FlexUInt follows with byte length of sequence
@@ -1007,7 +1031,7 @@ mod tests {
10071031
#[test]
10081032
fn intern_all_field_names() -> IonResult<()> {
10091033
struct_field_encoding_test(
1010-
FieldNameEncoding::WriteAsSymbolIds,
1034+
FieldNameEncoding::SymbolIds,
10111035
&[
10121036
// New symbols
10131037
(RawSymbolRef::Text("foo"), &[0x81]), // FlexUInt SID $64,
@@ -1023,7 +1047,7 @@ mod tests {
10231047
#[test]
10241048
fn write_all_field_names_as_text() -> IonResult<()> {
10251049
struct_field_encoding_test(
1026-
FieldNameEncoding::WriteAsInlineText,
1050+
FieldNameEncoding::InlineText,
10271051
&[
10281052
// New symbols
10291053
(RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo"
@@ -1038,7 +1062,7 @@ mod tests {
10381062
#[test]
10391063
fn write_new_field_names_as_text() -> IonResult<()> {
10401064
struct_field_encoding_test(
1041-
FieldNameEncoding::WriteNewSymbolsAsInlineText,
1065+
FieldNameEncoding::NewSymbolsAsInlineText,
10421066
&[
10431067
// New symbols
10441068
(RawSymbolRef::Text("foo"), &[0xFB, 0x66, 0x6F, 0x6F]), // FlexSym -3, "foo"

0 commit comments

Comments
 (0)