From bc57f4560c9676f10bd6aae4c8612be01bc2f6bf Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Mon, 10 Jun 2024 22:39:13 +0200 Subject: [PATCH 1/6] Add base support for annotated YAML objects. --- README.md | 2 + src/annotated.rs | 276 +++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 7 +- src/macros.rs | 85 +++++++++++++++ src/yaml.rs | 112 ++++--------------- 5 files changed, 390 insertions(+), 92 deletions(-) create mode 100644 src/annotated.rs create mode 100644 src/macros.rs diff --git a/README.md b/README.md index 1b417b95..c1df7820 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,8 @@ Note that `saphyr::Yaml` implements `Index<&'a str>` and `Index`: * `Index<&'a str>` assumes the container is a string to value map * otherwise, `Yaml::BadValue` is returned +Note that `annotated::YamlData` cannot return `BadValue` and will panic. + If your document does not conform to this convention (e.g. map with complex type key), you can use the `Yaml::as_XXX` family API of functions to access your objects. diff --git a/src/annotated.rs b/src/annotated.rs new file mode 100644 index 00000000..930e37ef --- /dev/null +++ b/src/annotated.rs @@ -0,0 +1,276 @@ +//! Utilities for extracting YAML with certain metadata. + +use std::ops::{Index, IndexMut}; + +use hashlink::LinkedHashMap; + +use crate::loader::parse_f64; + +/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// +/// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata +/// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to +/// be used, for instance, when parsing a YAML where retrieving markers / comments is relevant. +/// +/// This definition is recursive. Each annotated node will be a structure storing the annotations +/// and the YAML data. We need to have a distinct enumeration from [`Yaml`] because the type for +/// the `Array` and `Hash` variants is dependant on that structure. +/// +/// If we had written [`YamlData`] as: +/// ```ignore +/// pub enum YamlData { +/// // ... +/// Array(Vec), +/// Hash(LinkedHashMap), +/// // ... +/// } +/// ``` +/// we would have stored metadata for the root node only. All subsequent nodes would be [`Yaml`], +/// which does not contain any annotation. +/// +/// Notable differences with [`Yaml`]: +/// * Indexing cannot return `BadValue` and will panic instead. +/// +/// [`Yaml`]: crate::Yaml +#[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] +pub enum YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Float types are stored as String and parsed on demand. + /// Note that `f64` does NOT implement Eq trait and can NOT be stored in `BTreeMap`. + Real(String), + /// YAML int is stored as i64. + Integer(i64), + /// YAML scalar. + String(String), + /// YAML bool, e.g. `true` or `false`. + Boolean(bool), + /// YAML array, can be accessed as a `Vec`. + Array(AnnotatedArray), + /// YAML hash, can be accessed as a `LinkedHashMap`. + /// + /// Insertion order will match the order of insertion into the map. + Hash(AnnotatedHash), + /// Alias, not fully supported yet. + Alias(usize), + /// YAML null, e.g. `null` or `~`. + Null, + /// Accessing a nonexistent node via the Index trait returns `BadValue`. This + /// simplifies error handling in the calling code. Invalid type conversion also + /// returns `BadValue`. + BadValue, +} + +/// The type contained in the [`YamlData::Array`] variant. This corresponds to YAML sequences. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedArray = Vec; +/// The type contained in the [`YamlData::Hash`] variant. This corresponds to YAML mappings. +#[allow(clippy::module_name_repetitions)] +pub type AnnotatedHash = LinkedHashMap; + +impl YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + define_as!(as_bool, bool, Boolean); + define_as!(as_i64, i64, Integer); + + define_as_ref!(as_hash, &AnnotatedHash, Hash); + define_as_ref!(as_str, &str, String); + define_as_ref!(as_vec, &AnnotatedArray, Array); + + define_as_mut_ref!(as_mut_hash, &mut AnnotatedHash, Hash); + define_as_mut_ref!(as_mut_vec, &mut AnnotatedArray, Array); + + define_into!(into_bool, bool, Boolean); + define_into!(into_hash, AnnotatedHash, Hash); + define_into!(into_i64, i64, Integer); + define_into!(into_string, String, String); + define_into!(into_vec, AnnotatedArray, Array); + + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn as_f64(&self) -> Option { + if let Self::Real(ref v) = self { + parse_f64(v) + } else { + None + } + } + + /// Return the `f64` value contained in this YAML node. + /// + /// If the node is not a [`YamlData::Real`] YAML node or its contents is not a valid `f64` + /// string, `None` is returned. + #[must_use] + pub fn into_f64(self) -> Option { + self.as_f64() + } + + /// If a value is null or otherwise bad (see variants), consume it and + /// replace it with a given value `other`. Otherwise, return self unchanged. + /// + /// See [`Yaml::or`] for examples. + /// + /// [`Yaml::or`]: crate::Yaml::or + #[must_use] + pub fn or(self, other: Self) -> Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } + + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. + #[must_use] + pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { + match self { + Self::BadValue | Self::Null => other, + this => this, + } + } +} + +// NOTE(ethiraric, 10/06/2024): We cannot create a "generic static" variable which would act as a +// `BAD_VALUE`. This means that, unlike for `Yaml`, we have to make the indexing method panic. + +impl<'a, Node> Index<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index(&self, idx: &'a str) -> &Node { + let key = Self::String(idx.to_owned()); + match self.as_hash() { + Some(h) => h.get(&key.into()).unwrap(), + None => panic!("{idx}: key does not exist"), + } + } +} + +impl<'a, Node> IndexMut<&'a str> for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: &'a str) -> &mut Node { + let key = Self::String(idx.to_owned()); + match self.as_mut_hash() { + Some(h) => h.get_mut(&key.into()).unwrap(), + None => panic!("Not a hash type"), + } + } +} + +impl Index for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Output = Node; + + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`Index`]). If `self` is a + /// [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index(&self, idx: usize) -> &Node { + if let Some(v) = self.as_vec() { + v.get(idx).unwrap() + } else if let Some(v) = self.as_hash() { + let key = Self::Integer(i64::try_from(idx).unwrap()); + v.get(&key.into()).unwrap() + } else { + panic!("{idx}: Index out of bounds"); + } + } +} + +impl IndexMut for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + /// Perform indexing if `self` is a sequence or a mapping. + /// + /// # Panics + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is + /// a [`YamlData::Array`], this is when the index is bigger or equal to the length of the + /// underlying `Vec`. If `self` is a [`YamlData::Hash`], this is when the mapping sequence does + /// not contain [`YamlData::Integer`]`(idx)` as a key. + /// + /// This function also panics if `self` is not a [`YamlData::Array`] nor a [`YamlData::Hash`]. + fn index_mut(&mut self, idx: usize) -> &mut Node { + match self { + Self::Array(sequence) => sequence.index_mut(idx), + Self::Hash(mapping) => { + let key = Self::Integer(i64::try_from(idx).unwrap()); + mapping.get_mut(&key.into()).unwrap() + } + _ => panic!("Attempting to index but `self` is not a sequence nor a mapping"), + } + } +} + +impl IntoIterator for YamlData +where + Node: std::hash::Hash + std::cmp::Eq + From, +{ + type Item = Node; + type IntoIter = AnnotatedYamlIter; + + fn into_iter(self) -> Self::IntoIter { + Self::IntoIter { + yaml: self.into_vec().unwrap_or_default().into_iter(), + } + } +} + +/// An iterator over a [`YamlData`] node. +#[allow(clippy::module_name_repetitions)] +pub struct AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + yaml: std::vec::IntoIter, +} + +impl Iterator for AnnotatedYamlIter +where + Node: std::hash::Hash + std::cmp::Eq + From>, +{ + type Item = Node; + + fn next(&mut self) -> Option { + self.yaml.next() + } +} diff --git a/src/lib.rs b/src/lib.rs index ede027b0..43cd0b89 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,15 +43,20 @@ #![warn(missing_docs, clippy::pedantic)] +#[macro_use] +mod macros; + +mod annotated; mod char_traits; mod emitter; mod loader; mod yaml; // Re-export main components. +pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; pub use crate::emitter::YamlEmitter; pub use crate::loader::YamlLoader; -pub use crate::yaml::{Array, Hash, Yaml}; +pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] mod encoding; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 00000000..a4557367 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,85 @@ +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_as ( + ($fn_name:ident, $t:ident, $variant:ident) => ( +/// Get a copy of the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with a copy of the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning references. +macro_rules! define_as_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&$t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(&self) -> Option<$t> { + match *self { + Self::$variant(ref v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `as_TYPE` methods for the [`crate::Yaml`] enum, returning mutable references. +macro_rules! define_as_mut_ref ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some(&mut $t)` with the `$t` contained. +/// Otherwise, return `None`. +#[must_use] +pub fn $fn_name(&mut self) -> Option<$t> { + match *self { + Self::$variant(ref mut v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `into_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_into ( + ($fn_name:ident, $t:ty, $variant:ident) => ( +/// Get the inner object in the YAML enum if it is a `$t`. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `Some($t)` with the `$t` contained. Otherwise, +/// return `None`. +#[must_use] +pub fn $fn_name(self) -> Option<$t> { + match self { + Self::$variant(v) => Some(v), + _ => None + } +} + ); +); + +/// Generate `is_TYPE` methods for the [`crate::Yaml`] enum. +macro_rules! define_is ( + ($fn_name:ident, $variant:pat) => ( +/// Check whether the YAML enum contains the given variant. +/// +/// # Return +/// If the variant of `self` is `Self::$variant`, return `true`. Otherwise, return `False`. +#[must_use] +pub fn $fn_name(&self) -> bool { + matches!(self, $variant) +} + ); +); diff --git a/src/yaml.rs b/src/yaml.rs index acd8f68f..5ac883fe 100644 --- a/src/yaml.rs +++ b/src/yaml.rs @@ -56,108 +56,31 @@ pub type Array = Vec; /// The type contained in the `Yaml::Hash` variant. This corresponds to YAML mappings. pub type Hash = LinkedHashMap; -macro_rules! define_as ( - ($name:ident, $t:ident, $yt:ident) => ( -/// Get a copy of the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with a copy of the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&$t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(&self) -> Option<$t> { - match *self { - Yaml::$yt(ref v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_as_mut_ref ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get a mutable reference to the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some(&mut $t)` with the `$t` contained. -/// Otherwise, return `None`. -#[must_use] -pub fn $name(&mut self) -> Option<$t> { - match *self { - Yaml::$yt(ref mut v) => Some(v), - _ => None - } -} - ); -); - -macro_rules! define_into ( - ($name:ident, $t:ty, $yt:ident) => ( -/// Get the inner object in the YAML enum if it is a `$t`. -/// -/// # Return -/// If the variant of `self` is `Yaml::$yt`, return `Some($t)` with the `$t` contained. Otherwise, -/// return `None`. -#[must_use] -pub fn $name(self) -> Option<$t> { - match self { - Yaml::$yt(v) => Some(v), - _ => None - } -} - ); -); - impl Yaml { define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); - define_as_ref!(as_str, &str, String); define_as_ref!(as_hash, &Hash, Hash); + define_as_ref!(as_str, &str, String); define_as_ref!(as_vec, &Array, Array); define_as_mut_ref!(as_mut_hash, &mut Hash, Hash); define_as_mut_ref!(as_mut_vec, &mut Array, Array); define_into!(into_bool, bool, Boolean); + define_into!(into_hash, Hash, Hash); define_into!(into_i64, i64, Integer); define_into!(into_string, String, String); - define_into!(into_hash, Hash, Hash); define_into!(into_vec, Array, Array); - /// Return whether `self` is a [`Yaml::Null`] node. - #[must_use] - pub fn is_null(&self) -> bool { - matches!(*self, Yaml::Null) - } - - /// Return whether `self` is a [`Yaml::BadValue`] node. - #[must_use] - pub fn is_badvalue(&self) -> bool { - matches!(*self, Yaml::BadValue) - } - - /// Return whether `self` is a [`Yaml::Array`] node. - #[must_use] - pub fn is_array(&self) -> bool { - matches!(*self, Yaml::Array(_)) - } + define_is!(is_alias, Self::Alias(_)); + define_is!(is_array, Self::Array(_)); + define_is!(is_badvalue, Self::BadValue); + define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_integer, Self::Integer(_)); + define_is!(is_null, Self::Null); + define_is!(is_real, Self::Real(_)); + define_is!(is_string, Self::String(_)); /// Return the `f64` value contained in this YAML node. /// @@ -198,8 +121,9 @@ impl Yaml { } } - /// See `or` for behavior. This performs the same operations, but with - /// borrowed values for less linear pipelines. + /// See [`Self::or`] for behavior. + /// + /// This performs the same operations, but with borrowed values for less linear pipelines. #[must_use] pub fn borrowed_or<'a>(&'a self, other: &'a Self) -> &'a Self { match self { @@ -274,6 +198,12 @@ impl<'a> Index<&'a str> for Yaml { } impl<'a> IndexMut<&'a str> for Yaml { + /// Perform indexing if `self` is a mapping. + /// + /// # Panics + /// This function panics if the key given does not exist within `self` (as per [`Index`]). + /// + /// This function also panics if `self` is not a [`Yaml::Hash`]. fn index_mut(&mut self, idx: &'a str) -> &mut Yaml { let key = Yaml::String(idx.to_owned()); match self.as_mut_hash() { @@ -302,9 +232,9 @@ impl IndexMut for Yaml { /// Perform indexing if `self` is a sequence or a mapping. /// /// # Panics - /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` i + /// This function panics if the index given is out of range (as per [`IndexMut`]). If `self` is /// a [`Yaml::Array`], this is when the index is bigger or equal to the length of the - /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does no + /// underlying `Vec`. If `self` is a [`Yaml::Hash`], this is when the mapping sequence does not /// contain [`Yaml::Integer`]`(idx)` as a key. /// /// This function also panics if `self` is not a [`Yaml::Array`] nor a [`Yaml::Hash`]. From ed89f4f791de752f5ae5dc459541efe352726593 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 18:30:03 +0200 Subject: [PATCH 2/6] Prepare the ground for annotated parsing. * Make `YamlLoader` generic on the type of the `Node`. This is required because deeper node need to have annotations too. * Add a `LoadableYamlNode` trait, required for YAML node types to be loaded by `YamlLoader`. It contains methods required by `YamlLoader` during loading. * Implement `LoadableYamlNode` for `Yaml`. * Take `load_from_str` out of `YamlLoader` for parsing non-annotated nodes. This avoids every user to specify the generics in `YamlLoader::::load_from_str`. --- CHANGELOG.md | 9 +- examples/dump_yaml.rs | 4 +- src/emitter.rs | 11 +- src/encoding.rs | 4 +- src/lib.rs | 8 +- src/loader.rs | 236 ++++++++++++++++++++++++++------------- tests/basic.rs | 18 +-- tests/emitter.rs | 22 ++-- tests/quickcheck.rs | 4 +- tests/spec_test.rs | 4 +- tests/test_round_trip.rs | 15 +-- 11 files changed, 211 insertions(+), 124 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e3ebd2d..b3ac0657 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,14 @@ ## Upcoming -**Features** +**Breaking Changes**: +- Move `load_from_*` methods out of the `YamlLoader`. Now, `YamlLoader` gained + a generic parameter. Moving those functions out of it spares having to + manually specify the generic in `YamlLoader::::load_from_str`. + Manipulating the `YamlLoader` directly was not common. + + +**Features**: - ([#19](https://github.com/Ethiraric/yaml-rust2/pull/19)) `Yaml` now implements `IndexMut` and `IndexMut<&'a str>`. These functions may not diff --git a/examples/dump_yaml.rs b/examples/dump_yaml.rs index 8d85d7e1..34e41ee1 100644 --- a/examples/dump_yaml.rs +++ b/examples/dump_yaml.rs @@ -1,4 +1,4 @@ -use saphyr::{Yaml, YamlLoader}; +use saphyr::{load_from_str, Yaml}; use std::env; use std::fs::File; use std::io::prelude::*; @@ -36,7 +36,7 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - let docs = YamlLoader::load_from_str(&s).unwrap(); + let docs = load_from_str(&s).unwrap(); for doc in &docs { println!("---"); dump_node(doc, 0); diff --git a/src/emitter.rs b/src/emitter.rs index 19d8d4a1..8a7be40d 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -36,9 +36,9 @@ impl From for EmitError { /// The YAML serializer. /// /// ``` -/// # use saphyr::{YamlLoader, YamlEmitter}; +/// # use saphyr::{load_from_str, YamlEmitter}; /// let input_string = "a: b\nc: d"; -/// let yaml = YamlLoader::load_from_str(input_string).unwrap(); +/// let yaml = load_from_str(input_string).unwrap(); /// /// let mut output = String::new(); /// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); @@ -159,10 +159,10 @@ impl<'a> YamlEmitter<'a> { /// # Examples /// /// ```rust - /// use saphyr::{Yaml, YamlEmitter, YamlLoader}; + /// use saphyr::{Yaml, YamlEmitter, load_from_str}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - /// let parsed = YamlLoader::load_from_str(input).unwrap(); + /// let parsed = load_from_str(input).unwrap(); /// eprintln!("{:?}", parsed); /// /// let mut output = String::new(); @@ -410,12 +410,11 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod test { use super::YamlEmitter; - use crate::YamlLoader; #[test] fn test_multiline_string() { let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = YamlLoader::load_from_str(input).unwrap(); + let parsed = crate::load_from_str(input).unwrap(); let mut output = String::new(); let mut emitter = YamlEmitter::new(&mut output); emitter.multiline_strings(true); diff --git a/src/encoding.rs b/src/encoding.rs index 6d46dd35..17dcb69c 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -4,7 +4,7 @@ use std::{borrow::Cow, ops::ControlFlow}; use encoding_rs::{Decoder, DecoderResult, Encoding}; -use crate::{loader::LoadError, Yaml, YamlLoader}; +use crate::{loader::LoadError, Yaml}; /// The signature of the function to call when using [`YAMLDecodingTrap::Call`]. /// @@ -102,7 +102,7 @@ impl YamlDecoder { // Decode the input buffer. decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - YamlLoader::load_from_str(&output).map_err(LoadError::Scan) + crate::load_from_str(&output).map_err(LoadError::Scan) } } diff --git a/src/lib.rs b/src/lib.rs index 43cd0b89..140f50f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,9 +21,9 @@ //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use saphyr::{YamlLoader, YamlEmitter}; +//! use saphyr::{load_from_str, YamlEmitter}; //! -//! let docs = YamlLoader::load_from_str("[1, 2, 3]").unwrap(); +//! let docs = load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index //! @@ -55,7 +55,9 @@ mod yaml; // Re-export main components. pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; pub use crate::emitter::YamlEmitter; -pub use crate::loader::YamlLoader; +pub use crate::loader::{ + load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, +}; pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] diff --git a/src/loader.rs b/src/loader.rs index f2706bb9..365cb211 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -2,26 +2,84 @@ use std::collections::BTreeMap; +use hashlink::LinkedHashMap; use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; use crate::{Hash, Yaml}; +/// Load the given string as a set of YAML documents. +/// +/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only +/// if all documents are parsed successfully. An error in a latter document prevents the former +/// from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_str(source: &str) -> Result, ScanError> { + load_from_iter(source.chars()) +} + +/// Load the contents of the given iterator as a set of YAML documents. +/// +/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only +/// if all documents are parsed successfully. An error in a latter document prevents the former +/// from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + load_from_parser(&mut parser) +} + +/// Load the contents from the specified Parser as a set of YAML documents. +/// +/// Parsing succeeds if and only if all documents are parsed successfully. +/// An error in a latter document prevents the former from being returned. +/// # Errors +/// Returns `ScanError` when loading fails. +pub fn load_from_parser>( + parser: &mut Parser, +) -> Result, ScanError> { + let mut loader = YamlLoader::default(); + parser.load(&mut loader, true)?; + Ok(loader.docs) +} + /// Main structure for quickly parsing YAML. /// -/// See [`YamlLoader::load_from_str`]. -#[derive(Default)] +/// See [`load_from_str`]. #[allow(clippy::module_name_repetitions)] -pub struct YamlLoader { +pub struct YamlLoader +where + Node: LoadableYamlNode, +{ /// The different YAML documents that are loaded. - docs: Vec, + docs: Vec, // states // (current node, anchor_id) tuple - doc_stack: Vec<(Yaml, usize)>, - key_stack: Vec, - anchor_map: BTreeMap, + doc_stack: Vec<(Node, usize)>, + key_stack: Vec, + anchor_map: BTreeMap, +} + +// For some reason, rustc wants `Node: Default` if I `#[derive(Default)]`. +impl Default for YamlLoader +where + Node: LoadableYamlNode, +{ + fn default() -> Self { + Self { + docs: vec![], + doc_stack: vec![], + key_stack: vec![], + anchor_map: BTreeMap::new(), + } + } } -impl MarkedEventReceiver for YamlLoader { +impl MarkedEventReceiver for YamlLoader +where + Node: LoadableYamlNode, +{ fn on_event(&mut self, ev: Event, _: Marker) { // println!("EV {:?}", ev); match ev { @@ -31,21 +89,21 @@ impl MarkedEventReceiver for YamlLoader { Event::DocumentEnd => { match self.doc_stack.len() { // empty document - 0 => self.docs.push(Yaml::BadValue), + 0 => self.docs.push(Yaml::BadValue.into()), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!(), } } Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()), aid)); + self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid)); } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()), aid)); - self.key_stack.push(Yaml::BadValue); + self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid)); + self.key_stack.push(Yaml::BadValue.into()); } Event::MappingEnd => { self.key_stack.pop().unwrap(); @@ -91,17 +149,47 @@ impl MarkedEventReceiver for YamlLoader { Yaml::from_str(&v) }; - self.insert_new_node((node, aid)); + self.insert_new_node((node.into(), aid)); } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), - None => Yaml::BadValue, + None => Yaml::BadValue.into(), }; self.insert_new_node((n, 0)); } } - // println!("DOC {:?}", self.doc_stack); + } +} + +impl YamlLoader +where + Node: LoadableYamlNode, +{ + fn insert_new_node(&mut self, node: (Node, usize)) { + // valid anchor id starts from 1 + if node.1 > 0 { + self.anchor_map.insert(node.1, node.0.clone()); + } + if self.doc_stack.is_empty() { + self.doc_stack.push(node); + } else { + let parent = self.doc_stack.last_mut().unwrap(); + let parent_node = &mut parent.0; + if parent_node.is_array() { + parent_node.array_mut().push(node.0); + } else if parent_node.is_hash() { + let cur_key = self.key_stack.last_mut().unwrap(); + // current node is a key + if cur_key.is_badvalue() { + *cur_key = node.0; + // current node is a value + } else { + let hash = parent_node.hash_mut(); + hash.insert(cur_key.take(), node.0); + } + } + } } } @@ -142,76 +230,70 @@ impl std::fmt::Display for LoadError { } } -impl YamlLoader { - fn insert_new_node(&mut self, node: (Yaml, usize)) { - // valid anchor id starts from 1 - if node.1 > 0 { - self.anchor_map.insert(node.1, node.0.clone()); - } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); - match *parent { - (Yaml::Array(ref mut v), _) => v.push(node.0), - (Yaml::Hash(ref mut h), _) => { - let cur_key = self.key_stack.last_mut().unwrap(); - // current node is a key - if cur_key.is_badvalue() { - *cur_key = node.0; - // current node is a value - } else { - let mut newkey = Yaml::BadValue; - std::mem::swap(&mut newkey, cur_key); - h.insert(newkey, node.0); - } - } - _ => unreachable!(), - } - } - } +/// A trait providing methods used by the [`YamlLoader`]. +/// +/// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It +/// provides the necessary methods for [`YamlLoader`] to load data into the node. +pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { + /// Return whether the YAML node is an array. + fn is_array(&self) -> bool; - /// Load the given string as a set of YAML documents. + /// Return whether the YAML node is a hash. + fn is_hash(&self) -> bool; + + /// Return whether the YAML node is `BadValue`. + fn is_badvalue(&self) -> bool; + + /// Retrieve the array variant of the YAML node. /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_str(source: &str) -> Result, ScanError> { - Self::load_from_iter(source.chars()) - } + /// # Panics + /// This function panics if `self` is not an array. + fn array_mut(&mut self) -> &mut Vec; - /// Load the contents of the given iterator as a set of YAML documents. + /// Retrieve the hash variant of the YAML node. /// - /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only - /// if all documents are parsed successfully. An error in a latter document prevents the former - /// from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - Self::load_from_parser(&mut parser) + /// # Panics + /// This function panics if `self` is not a hash. + fn hash_mut(&mut self) -> &mut LinkedHashMap; + + /// Take the contained node out of `Self`, leaving a `BadValue` in its place. + #[must_use] + fn take(&mut self) -> Self; +} + +impl LoadableYamlNode for Yaml { + fn is_array(&self) -> bool { + matches!(self, Yaml::Array(_)) } - /// Load the contents from the specified Parser as a set of YAML documents. - /// - /// Parsing succeeds if and only if all documents are parsed successfully. - /// An error in a latter document prevents the former from being returned. - /// # Errors - /// Returns `ScanError` when loading fails. - pub fn load_from_parser>( - parser: &mut Parser, - ) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) + fn is_hash(&self) -> bool { + matches!(self, Yaml::Hash(_)) } - /// Return a reference to the parsed Yaml documents. - #[must_use] - pub fn documents(&self) -> &[Yaml] { - &self.docs + fn is_badvalue(&self) -> bool { + matches!(self, Yaml::BadValue) + } + + fn array_mut(&mut self) -> &mut Vec { + if let Yaml::Array(x) = self { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn hash_mut(&mut self) -> &mut LinkedHashMap { + if let Yaml::Hash(x) = self { + x + } else { + panic!("Called hash_mut on a non-hash"); + } + } + + fn take(&mut self) -> Self { + let mut taken_out = Yaml::BadValue; + std::mem::swap(&mut taken_out, self); + taken_out } } diff --git a/tests/basic.rs b/tests/basic.rs index cc00cb02..6a20c4d5 100644 --- a/tests/basic.rs +++ b/tests/basic.rs @@ -1,7 +1,7 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; #[test] fn test_api() { @@ -29,7 +29,7 @@ fn test_api() { - name: Staff damage: 3 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); @@ -50,7 +50,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); @@ -66,7 +66,7 @@ a1: &DEFAULT b2: d a2: *DEFAULT "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); } @@ -78,7 +78,7 @@ a1: &DEFAULT b1: 4 b2: *DEFAULT "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } @@ -114,7 +114,7 @@ fn test_plain_datatype() { - +12345 - [ true, false ] "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc[0].as_str().unwrap(), "string"); @@ -171,7 +171,7 @@ fn test_plain_datatype_with_into_methods() { - .NAN - !!float .INF "; - let mut out = YamlLoader::load_from_str(s).unwrap().into_iter(); + let mut out = load_from_str(s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -203,7 +203,7 @@ b: ~ a: ~ c: ~ "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( @@ -229,7 +229,7 @@ fn test_integer_key() { 1: important: false "; - let out = YamlLoader::load_from_str(s).unwrap(); + let out = load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } diff --git a/tests/emitter.rs b/tests/emitter.rs index 53e558fb..142713e3 100644 --- a/tests/emitter.rs +++ b/tests/emitter.rs @@ -1,4 +1,4 @@ -use saphyr::{YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, YamlEmitter}; #[allow(clippy::similar_names)] #[test] @@ -16,7 +16,7 @@ a4: - 2 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -25,7 +25,7 @@ a4: } println!("original:\n{s}"); println!("emitted:\n{writer}"); - let docs_new = match YamlLoader::load_from_str(&writer) { + let docs_new = match load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -55,14 +55,14 @@ products: {}: empty hash key "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = match YamlLoader::load_from_str(&writer) { + let docs_new = match load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -106,7 +106,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -164,7 +164,7 @@ null0: ~ bool0: true bool1: false"#; - let docs = YamlLoader::load_from_str(input).unwrap(); + let docs = load_from_str(input).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -212,7 +212,7 @@ e: h: []" }; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -234,7 +234,7 @@ a: - - e - f"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -258,7 +258,7 @@ a: - - f - - e"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -280,7 +280,7 @@ a: d: e: f"; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { diff --git a/tests/quickcheck.rs b/tests/quickcheck.rs index 819d0640..7c916014 100644 --- a/tests/quickcheck.rs +++ b/tests/quickcheck.rs @@ -3,7 +3,7 @@ extern crate quickcheck; use quickcheck::TestResult; -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { @@ -13,7 +13,7 @@ quickcheck! { let mut emitter = YamlEmitter::new(&mut out_str); emitter.dump(&input).unwrap(); } - match YamlLoader::load_from_str(&out_str) { + match load_from_str(&out_str) { Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), Err(err) => TestResult::error(err.to_string()), } diff --git a/tests/spec_test.rs b/tests/spec_test.rs index 80b6bfd6..52a0551e 100644 --- a/tests/spec_test.rs +++ b/tests/spec_test.rs @@ -1,4 +1,4 @@ -use saphyr::{Hash, Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Hash, Yaml, YamlEmitter}; #[test] fn test_mapvec_legal() { @@ -53,5 +53,5 @@ fn test_mapvec_legal() { // - 6 // ``` - YamlLoader::load_from_str(&out_str).unwrap(); + load_from_str(&out_str).unwrap(); } diff --git a/tests/test_round_trip.rs b/tests/test_round_trip.rs index 0d03d3e6..f1b28385 100644 --- a/tests/test_round_trip.rs +++ b/tests/test_round_trip.rs @@ -1,10 +1,10 @@ -use saphyr::{Yaml, YamlEmitter, YamlLoader}; +use saphyr::{load_from_str, Yaml, YamlEmitter}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); YamlEmitter::new(&mut emitted).dump(original).unwrap(); - let documents = YamlLoader::load_from_str(&emitted).unwrap(); + let documents = load_from_str(&emitted).unwrap(); println!("emitted {emitted}"); assert_eq!(documents.len(), 1); @@ -12,12 +12,12 @@ fn roundtrip(original: &Yaml) { } fn double_roundtrip(original: &str) { - let parsed = YamlLoader::load_from_str(original).unwrap(); + let parsed = load_from_str(original).unwrap(); let mut serialized = String::new(); YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); - let reparsed = YamlLoader::load_from_str(&serialized).unwrap(); + let reparsed = load_from_str(&serialized).unwrap(); assert_eq!(parsed, reparsed); } @@ -55,15 +55,12 @@ fn test_numberlike_strings() { /// Example from #[test] fn test_issue133() { - let doc = YamlLoader::load_from_str("\"0x123\"") - .unwrap() - .pop() - .unwrap(); + let doc = load_from_str("\"0x123\"").unwrap().pop().unwrap(); assert_eq!(doc, Yaml::String("0x123".to_string())); let mut out_str = String::new(); YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); - let doc2 = YamlLoader::load_from_str(&out_str).unwrap().pop().unwrap(); + let doc2 = load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now } From bf82fe7b52f2383f39e49cdc0bbb210e36d18c6b Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 19:14:05 +0200 Subject: [PATCH 3/6] Update doccomments. --- src/lib.rs | 1 - src/loader.rs | 44 ++++++++++++++++++++++++++++++++++---------- 2 files changed, 34 insertions(+), 11 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 140f50f8..f41cdd49 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -30,7 +30,6 @@ //! let mut out_str = String::new(); //! let mut emitter = YamlEmitter::new(&mut out_str); //! emitter.dump(doc).unwrap(); // dump the YAML object to a String -//! //! ``` //! //! # Features diff --git a/src/loader.rs b/src/loader.rs index 365cb211..0ea0b34e 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -7,22 +7,42 @@ use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScal use crate::{Hash, Yaml}; -/// Load the given string as a set of YAML documents. +/// Load the given string as an array of YAML documents. /// /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only /// if all documents are parsed successfully. An error in a latter document prevents the former /// from being returned. +/// +/// Most often, only one document is loaded in a YAML string. In this case, only the first element +/// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: +/// +/// ``` +/// use saphyr::{load_from_str, Yaml}; +/// +/// let docs = load_from_str(r#" +/// First document +/// --- +/// - Second document +/// "#).unwrap(); +/// let first_document = &docs[0]; // Select the first YAML document +/// // The document is a string containing "First document". +/// assert_eq!(*first_document, Yaml::String("First document".to_owned())); +/// +/// let second_document = &docs[1]; // Select the second YAML document +/// // The document is an array containing a single string, "Second document". +/// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); +/// ``` +/// /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_str(source: &str) -> Result, ScanError> { load_from_iter(source.chars()) } -/// Load the contents of the given iterator as a set of YAML documents. +/// Load the contents of the given iterator as an array of YAML documents. +/// +/// See [`load_from_str`] for details. /// -/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only -/// if all documents are parsed successfully. An error in a latter document prevents the former -/// from being returned. /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_iter>(source: I) -> Result, ScanError> { @@ -30,10 +50,10 @@ pub fn load_from_iter>(source: I) -> Result, load_from_parser(&mut parser) } -/// Load the contents from the specified Parser as a set of YAML documents. +/// Load the contents from the specified Parser as an array of YAML documents. +/// +/// See [`load_from_str`] for details. /// -/// Parsing succeeds if and only if all documents are parsed successfully. -/// An error in a latter document prevents the former from being returned. /// # Errors /// Returns `ScanError` when loading fails. pub fn load_from_parser>( @@ -44,9 +64,13 @@ pub fn load_from_parser>( Ok(loader.docs) } -/// Main structure for quickly parsing YAML. +/// Main structure for parsing YAML. +/// +/// The `YamlLoader` may load raw YAML documents or add metadata if needed. The type of the `Node` +/// dictates what data and metadata the loader will add to the `Node`. /// -/// See [`load_from_str`]. +/// Each node must implement [`LoadableYamlNode`]. The methods are required for the loader to +/// manipulate and populate the `Node`. #[allow(clippy::module_name_repetitions)] pub struct YamlLoader where From 13c967036aa91adf1e95399bbf300f0bc2a5a548 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 22:23:05 +0200 Subject: [PATCH 4/6] Implement `LoadableYamlNode` for `MarkedYaml`. A few changes have had to be made to `LoadableYamlNode`: * The `From` requirement has been removed as it can be error-prone. It was not a direct conversion as it is unable to handle `Yaml::Hash` or `Yaml::Array` with a non-empty array/map. * Instead, `from_bare_yaml` was added, which does essentially the same as `From` but does not leak for users of the library. * `with_marker` has been added to populate the marker for the `Node`. The function is empty for `Yaml`. `load_from_*` methods have been added to `MarkedYaml` for convenience. They load YAML using the markers. The markers returned from `saphyr-parser` are not all correct, meaning that tests are kind of useless for now as they will fail due to bugs outside of the scope of this library. --- CHANGELOG.md | 8 ++ Cargo.toml | 2 +- src/annotated.rs | 9 ++- src/annotated/marked_yaml.rs | 152 +++++++++++++++++++++++++++++++++++ src/lib.rs | 6 +- src/loader.rs | 59 +++++++++++--- src/yaml.rs | 1 + 7 files changed, 222 insertions(+), 15 deletions(-) create mode 100644 src/annotated/marked_yaml.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index b3ac0657..e87aa0ad 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,14 @@ already use this. Users of the original `yaml-rust` crate may freely disable this feature (`cargo <...> --no-default-features`) and lower MSRV to 1.65.0. +- Load with metadata + + The `YamlLoader` now supports adding metadata alongside the nodes. For now, + the only one supported is the `Marker`, pointing to the position in the input + stream of the start of the node. + + This feature is extensible and should allow (later) to add comments. + ## v0.8.0 **Breaking Changes**: diff --git a/Cargo.toml b/Cargo.toml index 4b624194..dd5f7a26 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ encoding = [ "dep:encoding_rs" ] [dependencies] arraydeque = "0.5.1" -saphyr-parser = "0.0.1" +saphyr-parser = "0.0.2" encoding_rs = { version = "0.8.33", optional = true } hashlink = "0.8" diff --git a/src/annotated.rs b/src/annotated.rs index 930e37ef..21ba8f3a 100644 --- a/src/annotated.rs +++ b/src/annotated.rs @@ -1,12 +1,17 @@ //! Utilities for extracting YAML with certain metadata. +pub mod marked_yaml; + use std::ops::{Index, IndexMut}; use hashlink::LinkedHashMap; use crate::loader::parse_f64; -/// A YAML node without annotation. See [`Yaml`], you probably want that. +/// YAML data for nodes that will contain annotations. +/// +/// If you want a YAML node without annotations, see [`Yaml`]. +/// If you want a YAML node with annotations, see types using [`YamlData`] such as [`MarkedYaml`] /// /// Unlike [`Yaml`] which only supports storing data, [`YamlData`] allows storing metadata /// alongside the YAML data. It is unlikely one would build it directly; it is mostly intended to @@ -32,6 +37,7 @@ use crate::loader::parse_f64; /// * Indexing cannot return `BadValue` and will panic instead. /// /// [`Yaml`]: crate::Yaml +/// [`MarkedYaml`]: marked_yaml::MarkedYaml #[derive(Clone, PartialEq, PartialOrd, Debug, Eq, Ord, Hash)] pub enum YamlData where @@ -93,6 +99,7 @@ where define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_)); diff --git a/src/annotated/marked_yaml.rs b/src/annotated/marked_yaml.rs new file mode 100644 index 00000000..1c86072f --- /dev/null +++ b/src/annotated/marked_yaml.rs @@ -0,0 +1,152 @@ +//! A YAML node with position in the source document. +//! +//! This is set aside so as to not clutter `annotated.rs`. + +use hashlink::LinkedHashMap; +use saphyr_parser::{Marker, Parser, ScanError}; + +use crate::{LoadableYamlNode, Yaml, YamlData, YamlLoader}; + +/// A YAML node with [`Marker`]s pointing to the start of the node. +/// +/// This structure does not implement functions to operate on the YAML object. To access those, +/// refer to the [`Self::data`] field. +#[derive(Clone, Debug)] +pub struct MarkedYaml { + /// The marker pointing to the start of the node. + /// + /// The marker is relative to the start of the input stream that was given to the parser, not + /// to the start of the document within the input stream. + pub marker: Marker, + /// The YAML contents of the node. + pub data: YamlData, +} + +impl MarkedYaml { + /// Load the given string as an array of YAML documents. + /// + /// See the function [`load_from_str`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_str`]: `crate::load_from_str` + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as an array of YAML documents. + /// + /// See the function [`load_from_iter`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_iter`]: `crate::load_from_iter` + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified [`Parser`] as an array of YAML documents. + /// + /// See the function [`load_from_parser`] for more details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + /// + /// [`load_from_parser`]: `crate::load_from_parser` + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::::default(); + parser.load(&mut loader, true)?; + Ok(loader.into_documents()) + } +} + +impl PartialEq for MarkedYaml { + fn eq(&self, other: &Self) -> bool { + self.data.eq(&other.data) + } +} + +// I don't know if it's okay to implement that, but we need it for the hashmap. +impl Eq for MarkedYaml {} + +impl std::hash::Hash for MarkedYaml { + fn hash(&self, state: &mut H) { + self.data.hash(state); + } +} + +impl From> for MarkedYaml { + fn from(value: YamlData) -> Self { + Self { + marker: Marker::default(), + data: value, + } + } +} + +impl LoadableYamlNode for MarkedYaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + Self { + marker: Marker::default(), + data: match yaml { + Yaml::Real(x) => YamlData::Real(x), + Yaml::Integer(x) => YamlData::Integer(x), + Yaml::String(x) => YamlData::String(x), + Yaml::Boolean(x) => YamlData::Boolean(x), + // Array and Hash will always have their container empty. + Yaml::Array(_) => YamlData::Array(vec![]), + Yaml::Hash(_) => YamlData::Hash(LinkedHashMap::new()), + Yaml::Alias(x) => YamlData::Alias(x), + Yaml::Null => YamlData::Null, + Yaml::BadValue => YamlData::BadValue, + }, + } + } + + fn is_array(&self) -> bool { + self.data.is_array() + } + + fn is_hash(&self) -> bool { + self.data.is_hash() + } + + fn is_badvalue(&self) -> bool { + self.data.is_badvalue() + } + + fn array_mut(&mut self) -> &mut Vec { + if let YamlData::Array(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn hash_mut(&mut self) -> &mut LinkedHashMap { + if let YamlData::Hash(x) = &mut self.data { + x + } else { + panic!("Called array_mut on a non-array"); + } + } + + fn take(&mut self) -> Self { + let mut taken_out = MarkedYaml { + marker: Marker::default(), + data: YamlData::BadValue, + }; + std::mem::swap(&mut taken_out, self); + taken_out + } + + fn with_marker(mut self, marker: Marker) -> Self { + self.marker = marker; + self + } +} diff --git a/src/lib.rs b/src/lib.rs index f41cdd49..1f431f38 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,7 +52,9 @@ mod loader; mod yaml; // Re-export main components. -pub use crate::annotated::{AnnotatedArray, AnnotatedHash, YamlData}; +pub use crate::annotated::{ + marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, +}; pub use crate::emitter::YamlEmitter; pub use crate::loader::{ load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, @@ -67,3 +69,5 @@ pub use crate::encoding::{YAMLDecodingTrap, YAMLDecodingTrapFn, YamlDecoder}; // Re-export `ScanError` as it is used as part of our public API and we want consumers to be able // to inspect it (e.g. perform a `match`). They wouldn't be able without it. pub use saphyr_parser::ScanError; +// Re-export [`Marker`] which is used for annotated YAMLs. +pub use saphyr_parser::Marker; diff --git a/src/loader.rs b/src/loader.rs index 0ea0b34e..d6ececdf 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -50,7 +50,7 @@ pub fn load_from_iter>(source: I) -> Result, load_from_parser(&mut parser) } -/// Load the contents from the specified Parser as an array of YAML documents. +/// Load the contents from the specified [`Parser`] as an array of YAML documents. /// /// See [`load_from_str`] for details. /// @@ -104,8 +104,7 @@ impl MarkedEventReceiver for YamlLoader where Node: LoadableYamlNode, { - fn on_event(&mut self, ev: Event, _: Marker) { - // println!("EV {:?}", ev); + fn on_event(&mut self, ev: Event, marker: Marker) { match ev { Event::DocumentStart | Event::Nothing | Event::StreamStart | Event::StreamEnd => { // do nothing @@ -113,21 +112,29 @@ where Event::DocumentEnd => { match self.doc_stack.len() { // empty document - 0 => self.docs.push(Yaml::BadValue.into()), + 0 => self + .docs + .push(Node::from_bare_yaml(Yaml::BadValue).with_marker(marker)), 1 => self.docs.push(self.doc_stack.pop().unwrap().0), _ => unreachable!(), } } Event::SequenceStart(aid, _) => { - self.doc_stack.push((Yaml::Array(Vec::new()).into(), aid)); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Array(Vec::new())).with_marker(marker), + aid, + )); } Event::SequenceEnd => { let node = self.doc_stack.pop().unwrap(); self.insert_new_node(node); } Event::MappingStart(aid, _) => { - self.doc_stack.push((Yaml::Hash(Hash::new()).into(), aid)); - self.key_stack.push(Yaml::BadValue.into()); + self.doc_stack.push(( + Node::from_bare_yaml(Yaml::Hash(Hash::new())).with_marker(marker), + aid, + )); + self.key_stack.push(Node::from_bare_yaml(Yaml::BadValue)); } Event::MappingEnd => { self.key_stack.pop().unwrap(); @@ -172,15 +179,14 @@ where // Datatype is not specified, or unrecognized Yaml::from_str(&v) }; - - self.insert_new_node((node.into(), aid)); + self.insert_new_node((Node::from_bare_yaml(node).with_marker(marker), aid)); } Event::Alias(id) => { let n = match self.anchor_map.get(&id) { Some(v) => v.clone(), - None => Yaml::BadValue.into(), + None => Node::from_bare_yaml(Yaml::BadValue), }; - self.insert_new_node((n, 0)); + self.insert_new_node((n.with_marker(marker), 0)); } } } @@ -215,6 +221,12 @@ where } } } + + /// Return the document nodes from `self`, consuming it in the process. + #[must_use] + pub fn into_documents(self) -> Vec { + self.docs + } } /// An error that happened when loading a YAML document. @@ -258,7 +270,19 @@ impl std::fmt::Display for LoadError { /// /// This trait must be implemented on YAML node types (i.e.: [`Yaml`] and annotated YAML nodes). It /// provides the necessary methods for [`YamlLoader`] to load data into the node. -pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { +pub trait LoadableYamlNode: Clone + std::hash::Hash + Eq { + /// Create an instance of `Self` from a [`Yaml`]. + /// + /// Nodes must implement this to be built. The optional metadata that they contain will be + /// later provided by the loader and can be default initialized. The [`Yaml`] object passed as + /// parameter may be of the [`Array`] or [`Hash`] variants. In this event, the inner container + /// will always be empty. There is no need to traverse all elements to convert them from + /// [`Yaml`] to `Self`. + /// + /// [`Array`]: `Yaml::Array` + /// [`Hash`]: `Yaml::Hash` + fn from_bare_yaml(yaml: Yaml) -> Self; + /// Return whether the YAML node is an array. fn is_array(&self) -> bool; @@ -283,9 +307,20 @@ pub trait LoadableYamlNode: From + Clone + std::hash::Hash + Eq { /// Take the contained node out of `Self`, leaving a `BadValue` in its place. #[must_use] fn take(&mut self) -> Self; + + /// Provide the marker for the node (builder-style). + #[inline] + #[must_use] + fn with_marker(self, _: Marker) -> Self { + self + } } impl LoadableYamlNode for Yaml { + fn from_bare_yaml(yaml: Yaml) -> Self { + yaml + } + fn is_array(&self) -> bool { matches!(self, Yaml::Array(_)) } diff --git a/src/yaml.rs b/src/yaml.rs index 5ac883fe..f15ba003 100644 --- a/src/yaml.rs +++ b/src/yaml.rs @@ -77,6 +77,7 @@ impl Yaml { define_is!(is_array, Self::Array(_)); define_is!(is_badvalue, Self::BadValue); define_is!(is_boolean, Self::Boolean(_)); + define_is!(is_hash, Self::Hash(_)); define_is!(is_integer, Self::Integer(_)); define_is!(is_null, Self::Null); define_is!(is_real, Self::Real(_)); From 796a15686d56658c7075dc74b5af55c0061dc7b1 Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Thu, 13 Jun 2024 22:37:56 +0200 Subject: [PATCH 5/6] Move `load_from_*` functions in `Yaml`. This would make more sense in user code: ```rs Yaml::load_from_str("foo"); // Explicit that we're parsing YAML load_from_str("foo"); // Too implicit, too generic, may be from another lib ``` Plus, this mirrors `MarkedYaml`'s behavior. --- README.md | 4 +-- examples/dump_yaml.rs | 4 +-- src/annotated/marked_yaml.rs | 10 +++--- src/emitter.rs | 12 +++++--- src/encoding.rs | 2 +- src/lib.rs | 8 ++--- src/loader.rs | 59 +---------------------------------- src/yaml.rs | 60 +++++++++++++++++++++++++++++++++++- tests/basic.rs | 18 +++++------ tests/emitter.rs | 22 ++++++------- tests/quickcheck.rs | 4 +-- tests/spec_test.rs | 4 +-- tests/test_round_trip.rs | 12 ++++---- 13 files changed, 110 insertions(+), 109 deletions(-) diff --git a/README.md b/README.md index c1df7820..498edc91 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ cargo add saphyr Use `saphyr::YamlLoader` to load YAML documents and access them as `Yaml` objects: ```rust -use saphyr::{YamlLoader, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; fn main() { let s = @@ -40,7 +40,7 @@ bar: - 1 - 2.0 "; - let docs = YamlLoader::load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); // Multi document support, doc is a yaml::Yaml let doc = &docs[0]; diff --git a/examples/dump_yaml.rs b/examples/dump_yaml.rs index 34e41ee1..8641732a 100644 --- a/examples/dump_yaml.rs +++ b/examples/dump_yaml.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, Yaml}; +use saphyr::Yaml; use std::env; use std::fs::File; use std::io::prelude::*; @@ -36,7 +36,7 @@ fn main() { let mut s = String::new(); f.read_to_string(&mut s).unwrap(); - let docs = load_from_str(&s).unwrap(); + let docs = Yaml::load_from_str(&s).unwrap(); for doc in &docs { println!("---"); dump_node(doc, 0); diff --git a/src/annotated/marked_yaml.rs b/src/annotated/marked_yaml.rs index 1c86072f..dcc73dcf 100644 --- a/src/annotated/marked_yaml.rs +++ b/src/annotated/marked_yaml.rs @@ -30,19 +30,19 @@ impl MarkedYaml { /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_str`]: `crate::load_from_str` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_str(source: &str) -> Result, ScanError> { Self::load_from_iter(source.chars()) } /// Load the contents of the given iterator as an array of YAML documents. /// - /// See the function [`load_from_iter`] for more details. + /// See the function [`load_from_str`] for more details. /// /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_iter`]: `crate::load_from_iter` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_iter>(source: I) -> Result, ScanError> { let mut parser = Parser::new(source); Self::load_from_parser(&mut parser) @@ -50,12 +50,12 @@ impl MarkedYaml { /// Load the contents from the specified [`Parser`] as an array of YAML documents. /// - /// See the function [`load_from_parser`] for more details. + /// See the function [`load_from_str`] for more details. /// /// # Errors /// Returns `ScanError` when loading fails. /// - /// [`load_from_parser`]: `crate::load_from_parser` + /// [`load_from_str`]: `Yaml::load_from_str` pub fn load_from_parser>( parser: &mut Parser, ) -> Result, ScanError> { diff --git a/src/emitter.rs b/src/emitter.rs index 8a7be40d..3653f564 100644 --- a/src/emitter.rs +++ b/src/emitter.rs @@ -36,9 +36,9 @@ impl From for EmitError { /// The YAML serializer. /// /// ``` -/// # use saphyr::{load_from_str, YamlEmitter}; +/// # use saphyr::{Yaml, YamlEmitter}; /// let input_string = "a: b\nc: d"; -/// let yaml = load_from_str(input_string).unwrap(); +/// let yaml = Yaml::load_from_str(input_string).unwrap(); /// /// let mut output = String::new(); /// YamlEmitter::new(&mut output).dump(&yaml[0]).unwrap(); @@ -159,10 +159,10 @@ impl<'a> YamlEmitter<'a> { /// # Examples /// /// ```rust - /// use saphyr::{Yaml, YamlEmitter, load_from_str}; + /// use saphyr::{Yaml, YamlEmitter}; /// /// let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - /// let parsed = load_from_str(input).unwrap(); + /// let parsed = Yaml::load_from_str(input).unwrap(); /// eprintln!("{:?}", parsed); /// /// let mut output = String::new(); @@ -409,12 +409,14 @@ fn need_quotes(string: &str) -> bool { #[cfg(test)] mod test { + use crate::Yaml; + use super::YamlEmitter; #[test] fn test_multiline_string() { let input = r#"{foo: "bar!\nbar!", baz: 42}"#; - let parsed = crate::load_from_str(input).unwrap(); + let parsed = Yaml::load_from_str(input).unwrap(); let mut output = String::new(); let mut emitter = YamlEmitter::new(&mut output); emitter.multiline_strings(true); diff --git a/src/encoding.rs b/src/encoding.rs index 17dcb69c..b5e3cd3a 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -102,7 +102,7 @@ impl YamlDecoder { // Decode the input buffer. decode_loop(&buffer, &mut output, &mut decoder, self.trap)?; - crate::load_from_str(&output).map_err(LoadError::Scan) + Yaml::load_from_str(&output).map_err(LoadError::Scan) } } diff --git a/src/lib.rs b/src/lib.rs index 1f431f38..53093125 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,9 +21,9 @@ //! Parse a string into `Vec` and then serialize it as a YAML string. //! //! ``` -//! use saphyr::{load_from_str, YamlEmitter}; +//! use saphyr::{Yaml, YamlEmitter}; //! -//! let docs = load_from_str("[1, 2, 3]").unwrap(); +//! let docs = Yaml::load_from_str("[1, 2, 3]").unwrap(); //! let doc = &docs[0]; // select the first YAML document //! assert_eq!(doc[0].as_i64().unwrap(), 1); // access elements by index //! @@ -56,9 +56,7 @@ pub use crate::annotated::{ marked_yaml::MarkedYaml, AnnotatedArray, AnnotatedHash, AnnotatedYamlIter, YamlData, }; pub use crate::emitter::YamlEmitter; -pub use crate::loader::{ - load_from_iter, load_from_parser, load_from_str, LoadableYamlNode, YamlLoader, -}; +pub use crate::loader::{LoadableYamlNode, YamlLoader}; pub use crate::yaml::{Array, Hash, Yaml, YamlIter}; #[cfg(feature = "encoding")] diff --git a/src/loader.rs b/src/loader.rs index d6ececdf..188512ac 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -3,67 +3,10 @@ use std::collections::BTreeMap; use hashlink::LinkedHashMap; -use saphyr_parser::{Event, MarkedEventReceiver, Marker, Parser, ScanError, TScalarStyle, Tag}; +use saphyr_parser::{Event, MarkedEventReceiver, Marker, ScanError, TScalarStyle, Tag}; use crate::{Hash, Yaml}; -/// Load the given string as an array of YAML documents. -/// -/// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only -/// if all documents are parsed successfully. An error in a latter document prevents the former -/// from being returned. -/// -/// Most often, only one document is loaded in a YAML string. In this case, only the first element -/// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: -/// -/// ``` -/// use saphyr::{load_from_str, Yaml}; -/// -/// let docs = load_from_str(r#" -/// First document -/// --- -/// - Second document -/// "#).unwrap(); -/// let first_document = &docs[0]; // Select the first YAML document -/// // The document is a string containing "First document". -/// assert_eq!(*first_document, Yaml::String("First document".to_owned())); -/// -/// let second_document = &docs[1]; // Select the second YAML document -/// // The document is an array containing a single string, "Second document". -/// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); -/// ``` -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_str(source: &str) -> Result, ScanError> { - load_from_iter(source.chars()) -} - -/// Load the contents of the given iterator as an array of YAML documents. -/// -/// See [`load_from_str`] for details. -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_iter>(source: I) -> Result, ScanError> { - let mut parser = Parser::new(source); - load_from_parser(&mut parser) -} - -/// Load the contents from the specified [`Parser`] as an array of YAML documents. -/// -/// See [`load_from_str`] for details. -/// -/// # Errors -/// Returns `ScanError` when loading fails. -pub fn load_from_parser>( - parser: &mut Parser, -) -> Result, ScanError> { - let mut loader = YamlLoader::default(); - parser.load(&mut loader, true)?; - Ok(loader.docs) -} - /// Main structure for parsing YAML. /// /// The `YamlLoader` may load raw YAML documents or add metadata if needed. The type of the `Node` diff --git a/src/yaml.rs b/src/yaml.rs index f15ba003..581d5d75 100644 --- a/src/yaml.rs +++ b/src/yaml.rs @@ -5,8 +5,9 @@ use std::{convert::TryFrom, ops::Index, ops::IndexMut}; use hashlink::LinkedHashMap; +use saphyr_parser::{Parser, ScanError}; -use crate::loader::parse_f64; +use crate::{loader::parse_f64, YamlLoader}; /// A YAML node is stored as this `Yaml` enumeration, which provides an easy way to /// access your YAML document. @@ -57,6 +58,63 @@ pub type Array = Vec; pub type Hash = LinkedHashMap; impl Yaml { + /// Load the given string as an array of YAML documents. + /// + /// The `source` is interpreted as YAML documents and is parsed. Parsing succeeds if and only + /// if all documents are parsed successfully. An error in a latter document prevents the former + /// from being returned. + /// + /// Most often, only one document is loaded in a YAML string. In this case, only the first element + /// of the returned `Vec` will be used. Otherwise, each element in the `Vec` is a document: + /// + /// ``` + /// use saphyr::Yaml; + /// + /// let docs = Yaml::load_from_str(r#" + /// First document + /// --- + /// - Second document + /// "#).unwrap(); + /// let first_document = &docs[0]; // Select the first YAML document + /// // The document is a string containing "First document". + /// assert_eq!(*first_document, Yaml::String("First document".to_owned())); + /// + /// let second_document = &docs[1]; // Select the second YAML document + /// // The document is an array containing a single string, "Second document". + /// assert_eq!(second_document[0], Yaml::String("Second document".to_owned())); + /// ``` + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_str(source: &str) -> Result, ScanError> { + Self::load_from_iter(source.chars()) + } + + /// Load the contents of the given iterator as an array of YAML documents. + /// + /// See [`Self::load_from_str`] for details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_iter>(source: I) -> Result, ScanError> { + let mut parser = Parser::new(source); + Self::load_from_parser(&mut parser) + } + + /// Load the contents from the specified [`Parser`] as an array of YAML documents. + /// + /// See [`Self::load_from_str`] for details. + /// + /// # Errors + /// Returns `ScanError` when loading fails. + pub fn load_from_parser>( + parser: &mut Parser, + ) -> Result, ScanError> { + let mut loader = YamlLoader::default(); + parser.load(&mut loader, true)?; + Ok(loader.into_documents()) + } + define_as!(as_bool, bool, Boolean); define_as!(as_i64, i64, Integer); diff --git a/tests/basic.rs b/tests/basic.rs index 6a20c4d5..7a97c771 100644 --- a/tests/basic.rs +++ b/tests/basic.rs @@ -1,7 +1,7 @@ #![allow(clippy::bool_assert_comparison)] #![allow(clippy::float_cmp)] -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; #[test] fn test_api() { @@ -29,7 +29,7 @@ fn test_api() { - name: Staff damage: 3 "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; assert_eq!(doc[0]["name"].as_str().unwrap(), "Ogre"); @@ -50,7 +50,7 @@ a: 1 b: 2.2 c: [1, 2] "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a"].as_i64().unwrap(), 1i64); assert_eq!(doc["b"].as_f64().unwrap(), 2.2f64); @@ -66,7 +66,7 @@ a1: &DEFAULT b2: d a2: *DEFAULT "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a2"]["b1"].as_i64().unwrap(), 4); } @@ -78,7 +78,7 @@ a1: &DEFAULT b1: 4 b2: *DEFAULT "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc["a1"]["b2"], Yaml::BadValue); } @@ -114,7 +114,7 @@ fn test_plain_datatype() { - +12345 - [ true, false ] "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let doc = &out[0]; assert_eq!(doc[0].as_str().unwrap(), "string"); @@ -171,7 +171,7 @@ fn test_plain_datatype_with_into_methods() { - .NAN - !!float .INF "; - let mut out = load_from_str(s).unwrap().into_iter(); + let mut out = Yaml::load_from_str(s).unwrap().into_iter(); let mut doc = out.next().unwrap().into_iter(); assert_eq!(doc.next().unwrap().into_string().unwrap(), "string"); @@ -203,7 +203,7 @@ b: ~ a: ~ c: ~ "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); let mut iter = first.into_hash().unwrap().into_iter(); assert_eq!( @@ -229,7 +229,7 @@ fn test_integer_key() { 1: important: false "; - let out = load_from_str(s).unwrap(); + let out = Yaml::load_from_str(s).unwrap(); let first = out.into_iter().next().unwrap(); assert_eq!(first[0]["important"].as_bool().unwrap(), true); } diff --git a/tests/emitter.rs b/tests/emitter.rs index 142713e3..fdf1acbd 100644 --- a/tests/emitter.rs +++ b/tests/emitter.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; #[allow(clippy::similar_names)] #[test] @@ -16,7 +16,7 @@ a4: - 2 "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -25,7 +25,7 @@ a4: } println!("original:\n{s}"); println!("emitted:\n{writer}"); - let docs_new = match load_from_str(&writer) { + let docs_new = match Yaml::load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -55,14 +55,14 @@ products: {}: empty hash key "; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { let mut emitter = YamlEmitter::new(&mut writer); emitter.dump(doc).unwrap(); } - let docs_new = match load_from_str(&writer) { + let docs_new = match Yaml::load_from_str(&writer) { Ok(y) => y, Err(e) => panic!("{}", e), }; @@ -106,7 +106,7 @@ x: test y: avoid quoting here z: string with spaces"#; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -164,7 +164,7 @@ null0: ~ bool0: true bool1: false"#; - let docs = load_from_str(input).unwrap(); + let docs = Yaml::load_from_str(input).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -212,7 +212,7 @@ e: h: []" }; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -234,7 +234,7 @@ a: - - e - f"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -258,7 +258,7 @@ a: - - f - - e"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { @@ -280,7 +280,7 @@ a: d: e: f"; - let docs = load_from_str(s).unwrap(); + let docs = Yaml::load_from_str(s).unwrap(); let doc = &docs[0]; let mut writer = String::new(); { diff --git a/tests/quickcheck.rs b/tests/quickcheck.rs index 7c916014..666739cc 100644 --- a/tests/quickcheck.rs +++ b/tests/quickcheck.rs @@ -3,7 +3,7 @@ extern crate quickcheck; use quickcheck::TestResult; -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; quickcheck! { fn test_check_weird_keys(xs: Vec) -> TestResult { @@ -13,7 +13,7 @@ quickcheck! { let mut emitter = YamlEmitter::new(&mut out_str); emitter.dump(&input).unwrap(); } - match load_from_str(&out_str) { + match Yaml::load_from_str(&out_str) { Ok(output) => TestResult::from_bool(output.len() == 1 && input == output[0]), Err(err) => TestResult::error(err.to_string()), } diff --git a/tests/spec_test.rs b/tests/spec_test.rs index 52a0551e..1cf98e50 100644 --- a/tests/spec_test.rs +++ b/tests/spec_test.rs @@ -1,4 +1,4 @@ -use saphyr::{load_from_str, Hash, Yaml, YamlEmitter}; +use saphyr::{Hash, Yaml, YamlEmitter}; #[test] fn test_mapvec_legal() { @@ -53,5 +53,5 @@ fn test_mapvec_legal() { // - 6 // ``` - load_from_str(&out_str).unwrap(); + Yaml::load_from_str(&out_str).unwrap(); } diff --git a/tests/test_round_trip.rs b/tests/test_round_trip.rs index f1b28385..e4ada73b 100644 --- a/tests/test_round_trip.rs +++ b/tests/test_round_trip.rs @@ -1,10 +1,10 @@ -use saphyr::{load_from_str, Yaml, YamlEmitter}; +use saphyr::{Yaml, YamlEmitter}; fn roundtrip(original: &Yaml) { let mut emitted = String::new(); YamlEmitter::new(&mut emitted).dump(original).unwrap(); - let documents = load_from_str(&emitted).unwrap(); + let documents = Yaml::load_from_str(&emitted).unwrap(); println!("emitted {emitted}"); assert_eq!(documents.len(), 1); @@ -12,12 +12,12 @@ fn roundtrip(original: &Yaml) { } fn double_roundtrip(original: &str) { - let parsed = load_from_str(original).unwrap(); + let parsed = Yaml::load_from_str(original).unwrap(); let mut serialized = String::new(); YamlEmitter::new(&mut serialized).dump(&parsed[0]).unwrap(); - let reparsed = load_from_str(&serialized).unwrap(); + let reparsed = Yaml::load_from_str(&serialized).unwrap(); assert_eq!(parsed, reparsed); } @@ -55,12 +55,12 @@ fn test_numberlike_strings() { /// Example from #[test] fn test_issue133() { - let doc = load_from_str("\"0x123\"").unwrap().pop().unwrap(); + let doc = Yaml::load_from_str("\"0x123\"").unwrap().pop().unwrap(); assert_eq!(doc, Yaml::String("0x123".to_string())); let mut out_str = String::new(); YamlEmitter::new(&mut out_str).dump(&doc).unwrap(); - let doc2 = load_from_str(&out_str).unwrap().pop().unwrap(); + let doc2 = Yaml::load_from_str(&out_str).unwrap().pop().unwrap(); assert_eq!(doc, doc2); // This failed because the type has changed to a number now } From 7fd699859f676a13183b1917cd2072982426c1ab Mon Sep 17 00:00:00 2001 From: Ethiraric Date: Wed, 3 Jul 2024 00:51:37 +0200 Subject: [PATCH 6/6] Refactor to remove unnecessary unwrap. --- src/loader.rs | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/loader.rs b/src/loader.rs index 188512ac..5f534922 100644 --- a/src/loader.rs +++ b/src/loader.rs @@ -144,10 +144,7 @@ where if node.1 > 0 { self.anchor_map.insert(node.1, node.0.clone()); } - if self.doc_stack.is_empty() { - self.doc_stack.push(node); - } else { - let parent = self.doc_stack.last_mut().unwrap(); + if let Some(parent) = self.doc_stack.last_mut() { let parent_node = &mut parent.0; if parent_node.is_array() { parent_node.array_mut().push(node.0); @@ -162,6 +159,8 @@ where hash.insert(cur_key.take(), node.0); } } + } else { + self.doc_stack.push(node); } }