Skip to content

Commit 5da3ef8

Browse files
committed
Add a Bytes type for more efficient byte sequences
The `Bytes` type is heavily inspired by `serde_bytes` and ports it to the serde_as system. ```rust value: Vec<u8>, ``` Compared to `serde_bytes` these improvements are available 1. Integration with the `serde_as` annotation. /cc serde-rs/bytes#14 2. Implementation for arrays of arbitrary size (Rust 1.51+). /cc serde-rs/bytes#26
1 parent 5f68bb2 commit 5da3ef8

File tree

5 files changed

+376
-0
lines changed

5 files changed

+376
-0
lines changed

src/de/const_arrays.rs

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use super::*;
22
use crate::utils::{MapIter, SeqIter};
33
use serde::de::*;
44
use std::collections::{BTreeMap, HashMap};
5+
use std::convert::TryInto;
56
use std::fmt;
67
use std::mem::MaybeUninit;
78

@@ -146,3 +147,55 @@ macro_rules! tuple_seq_as_map_impl_intern {
146147
}
147148
tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap<KAs, VAs>);
148149
tuple_seq_as_map_impl_intern!([(K, V); N], HashMap<KAs, VAs>);
150+
151+
impl<'de, const N: usize> DeserializeAs<'de, [u8; N]> for Bytes {
152+
fn deserialize_as<D>(deserializer: D) -> Result<[u8; N], D::Error>
153+
where
154+
D: Deserializer<'de>,
155+
{
156+
struct ArrayVisitor<const M: usize>;
157+
158+
impl<'de, const M: usize> Visitor<'de> for ArrayVisitor<M> {
159+
type Value = [u8; M];
160+
161+
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
162+
formatter.write_fmt(format_args!("an byte array of size {}", M))
163+
}
164+
165+
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
166+
where
167+
A: SeqAccess<'de>,
168+
{
169+
array_from_iterator(SeqIter::new(seq), &self)
170+
}
171+
172+
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
173+
where
174+
E: Error,
175+
{
176+
v.try_into()
177+
.map_err(|_| Error::invalid_length(v.len(), &self))
178+
}
179+
180+
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
181+
where
182+
E: Error,
183+
{
184+
v.as_bytes()
185+
.try_into()
186+
.map_err(|_| Error::invalid_length(v.len(), &self))
187+
}
188+
}
189+
190+
deserializer.deserialize_bytes(ArrayVisitor::<N>)
191+
}
192+
}
193+
194+
impl<'de, const N: usize> DeserializeAs<'de, Box<[u8; N]>> for Bytes {
195+
fn deserialize_as<D>(deserializer: D) -> Result<Box<[u8; N]>, D::Error>
196+
where
197+
D: Deserializer<'de>,
198+
{
199+
<Bytes as DeserializeAs<'de, [u8; N]>>::deserialize_as(deserializer).map(Box::new)
200+
}
201+
}

src/de/impls.rs

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use crate::rust::StringWithSeparator;
44
use crate::utils;
55
use crate::utils::duration::DurationSigned;
66
use serde::de::*;
7+
use std::borrow::Cow;
78
use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque};
89
use std::convert::From;
910
use std::fmt::{self, Display};
@@ -702,3 +703,166 @@ where
702703
Ok(Option::<U>::deserialize_as(deserializer)?.unwrap_or_default())
703704
}
704705
}
706+
707+
impl<'de> DeserializeAs<'de, &'de [u8]> for Bytes {
708+
fn deserialize_as<D>(deserializer: D) -> Result<&'de [u8], D::Error>
709+
where
710+
D: Deserializer<'de>,
711+
{
712+
<&'de [u8]>::deserialize(deserializer)
713+
}
714+
}
715+
716+
// serde_bytes implementation for ByteBuf
717+
// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/bytebuf.rs#L196
718+
//
719+
// Implements:
720+
// * visit_seq
721+
// * visit_bytes
722+
// * visit_byte_buf
723+
// * visit_str
724+
// * visit_string
725+
impl<'de> DeserializeAs<'de, Vec<u8>> for Bytes {
726+
fn deserialize_as<D>(deserializer: D) -> Result<Vec<u8>, D::Error>
727+
where
728+
D: Deserializer<'de>,
729+
{
730+
struct VecVisitor;
731+
732+
impl<'de> Visitor<'de> for VecVisitor {
733+
type Value = Vec<u8>;
734+
735+
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
736+
formatter.write_str("a byte array")
737+
}
738+
739+
fn visit_seq<A>(self, seq: A) -> Result<Self::Value, A::Error>
740+
where
741+
A: SeqAccess<'de>,
742+
{
743+
Ok(utils::SeqIter::new(seq).collect::<Result<_, _>>()?)
744+
}
745+
746+
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
747+
where
748+
E: Error,
749+
{
750+
Ok(v.to_vec())
751+
}
752+
753+
fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
754+
where
755+
E: Error,
756+
{
757+
Ok(v)
758+
}
759+
760+
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
761+
where
762+
E: Error,
763+
{
764+
Ok(v.as_bytes().to_vec())
765+
}
766+
767+
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
768+
where
769+
E: Error,
770+
{
771+
Ok(v.into_bytes())
772+
}
773+
}
774+
775+
deserializer.deserialize_byte_buf(VecVisitor)
776+
}
777+
}
778+
779+
impl<'de> DeserializeAs<'de, Box<[u8]>> for Bytes {
780+
fn deserialize_as<D>(deserializer: D) -> Result<Box<[u8]>, D::Error>
781+
where
782+
D: Deserializer<'de>,
783+
{
784+
<Bytes as DeserializeAs<'de, Vec<u8>>>::deserialize_as(deserializer)
785+
.map(|vec| vec.into_boxed_slice())
786+
}
787+
}
788+
789+
// serde_bytes implementation for Cow<'a, [u8]>
790+
// https://github.com/serde-rs/bytes/blob/cbae606b9dc225fc094b031cc84eac9493da2058/src/de.rs#L77
791+
//
792+
// Implements:
793+
// * visit_borrowed_bytes
794+
// * visit_borrowed_str
795+
// * visit_bytes
796+
// * visit_str
797+
// * visit_byte_buf
798+
// * visit_string
799+
// * visit_seq
800+
impl<'de> DeserializeAs<'de, Cow<'de, [u8]>> for Bytes {
801+
fn deserialize_as<D>(deserializer: D) -> Result<Cow<'de, [u8]>, D::Error>
802+
where
803+
D: Deserializer<'de>,
804+
{
805+
struct CowVisitor;
806+
807+
impl<'de> Visitor<'de> for CowVisitor {
808+
type Value = Cow<'de, [u8]>;
809+
810+
fn expecting(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
811+
formatter.write_str("a byte array")
812+
}
813+
814+
fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
815+
where
816+
E: Error,
817+
{
818+
Ok(Cow::Borrowed(v))
819+
}
820+
821+
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
822+
where
823+
E: Error,
824+
{
825+
Ok(Cow::Borrowed(v.as_bytes()))
826+
}
827+
828+
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
829+
where
830+
E: Error,
831+
{
832+
Ok(Cow::Owned(v.to_vec()))
833+
}
834+
835+
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
836+
where
837+
E: Error,
838+
{
839+
Ok(Cow::Owned(v.as_bytes().to_vec()))
840+
}
841+
842+
fn visit_byte_buf<E>(self, v: Vec<u8>) -> Result<Self::Value, E>
843+
where
844+
E: Error,
845+
{
846+
Ok(Cow::Owned(v))
847+
}
848+
849+
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
850+
where
851+
E: Error,
852+
{
853+
Ok(Cow::Owned(v.into_bytes()))
854+
}
855+
856+
fn visit_seq<V>(self, seq: V) -> Result<Self::Value, V::Error>
857+
where
858+
V: SeqAccess<'de>,
859+
{
860+
Ok(Cow::Owned(
861+
utils::SeqIter::new(seq).collect::<Result<_, _>>()?,
862+
))
863+
}
864+
}
865+
866+
deserializer.deserialize_bytes(CowVisitor)
867+
}
868+
}

src/lib.rs

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1320,3 +1320,107 @@ pub struct TimestampNanoSecondsWithFrac<
13201320
FORMAT: formats::Format = f64,
13211321
STRICTNESS: formats::Strictness = formats::Strict,
13221322
>(PhantomData<(FORMAT, STRICTNESS)>);
1323+
1324+
/// Optimized handling of owned and borrowed byte representations.
1325+
///
1326+
/// Serialization of byte sequences like `&[u8]` or `Vec<u8>` is quite inefficient since each value will be serialized individually.
1327+
/// This converter type optimizes the serialization and deserialization.
1328+
///
1329+
/// This is a port of the `serde_bytes` crate making it compatible with the `serde_as`-annotation, which allows it to be used in more cases than provided by `serde_bytes`.
1330+
///
1331+
/// The type provides de-/serialization for these types:
1332+
///
1333+
/// * `[u8; N]`, Rust 1.51+, not possible using `serde_bytes`
1334+
/// * `&[u8]`
1335+
/// * `Box<[u8; N]>`, Rust 1.51+, not possible using `serde_bytes`
1336+
/// * `Box<[u8]>`
1337+
/// * `Vec<u8>`
1338+
/// * `Cow<'_, [u8]>`
1339+
///
1340+
/// # Examples
1341+
///
1342+
/// ```
1343+
/// # #[cfg(feature = "macros")] {
1344+
/// # use serde::{Deserialize, Serialize};
1345+
/// # use serde_with::{serde_as, Bytes};
1346+
/// # use std::borrow::Cow;
1347+
/// #
1348+
/// #[serde_as]
1349+
/// # #[derive(Debug, PartialEq)]
1350+
/// #[derive(Deserialize, Serialize)]
1351+
/// struct Test<'a> {
1352+
/// #[serde_as(as = "Bytes")]
1353+
/// array: [u8; 15],
1354+
/// #[serde_as(as = "Bytes")]
1355+
/// boxed: Box<[u8]>,
1356+
/// #[serde_as(as = "Bytes")]
1357+
/// #[serde(borrow)]
1358+
/// cow: Cow<'a, [u8]>,
1359+
/// #[serde_as(as = "Bytes")]
1360+
/// vec: Vec<u8>,
1361+
/// }
1362+
///
1363+
/// let value = Test {
1364+
/// array: b"0123456789ABCDE".clone(),
1365+
/// boxed: b"...".to_vec().into_boxed_slice(),
1366+
/// cow: Cow::Borrowed(b"FooBar"),
1367+
/// vec: vec![0x41, 0x61, 0x21],
1368+
/// };
1369+
/// let expected = r#"(
1370+
/// array: "MDEyMzQ1Njc4OUFCQ0RF",
1371+
/// boxed: "Li4u",
1372+
/// cow: "Rm9vQmFy",
1373+
/// vec: "QWEh",
1374+
/// )"#;
1375+
///
1376+
/// assert_eq!(expected, ron::ser::to_string_pretty(&value, Default::default()).unwrap());
1377+
/// assert_eq!(value, ron::from_str(&expected).unwrap());
1378+
/// # }
1379+
/// ```
1380+
///
1381+
/// ## Alternative to [`BytesOrString`]
1382+
///
1383+
/// The [`Bytes`] can replace [`BytesOrString`].
1384+
/// [`Bytes`] is implemented for more types, which makes it better.
1385+
/// The serialization behavior of [`Bytes`] differes from [`BytesOrString`], therefore only `deserialize_as` should be used.
1386+
///
1387+
/// ```rust
1388+
/// # #[cfg(feature = "macros")] {
1389+
/// # use serde::Deserialize;
1390+
/// # use serde_json::json;
1391+
/// # use serde_with::{serde_as, Bytes};
1392+
/// #
1393+
/// #[serde_as]
1394+
/// # #[derive(Debug, PartialEq)]
1395+
/// #[derive(Deserialize, serde::Serialize)]
1396+
/// struct Test {
1397+
/// #[serde_as(deserialize_as = "Bytes")]
1398+
/// from_bytes: Vec<u8>,
1399+
/// #[serde_as(deserialize_as = "Bytes")]
1400+
/// from_str: Vec<u8>,
1401+
/// }
1402+
///
1403+
/// // Different serialized values ...
1404+
/// let j = json!({
1405+
/// "from_bytes": [70,111,111,45,66,97,114],
1406+
/// "from_str": "Foo-Bar",
1407+
/// });
1408+
///
1409+
/// // can be deserialized ...
1410+
/// let test = Test {
1411+
/// from_bytes: b"Foo-Bar".to_vec(),
1412+
/// from_str: b"Foo-Bar".to_vec(),
1413+
/// };
1414+
/// assert_eq!(test, serde_json::from_value(j).unwrap());
1415+
///
1416+
/// // and serialization will always be a byte sequence
1417+
/// # assert_eq!(json!(
1418+
/// {
1419+
/// "from_bytes": [70,111,111,45,66,97,114],
1420+
/// "from_str": [70,111,111,45,66,97,114],
1421+
/// }
1422+
/// # ), serde_json::to_value(&test).unwrap());
1423+
/// # }
1424+
/// ```
1425+
#[derive(Copy, Clone, Debug, Default)]
1426+
pub struct Bytes;

src/ser/const_arrays.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,3 +42,21 @@ macro_rules! tuple_seq_as_map_impl_intern {
4242
}
4343
tuple_seq_as_map_impl_intern!([(K, V); N], BTreeMap<K, V>);
4444
tuple_seq_as_map_impl_intern!([(K, V); N], HashMap<K, V>);
45+
46+
impl<'a, const N: usize> SerializeAs<[u8; N]> for Bytes {
47+
fn serialize_as<S>(bytes: &[u8; N], serializer: S) -> Result<S::Ok, S::Error>
48+
where
49+
S: Serializer,
50+
{
51+
serializer.serialize_bytes(bytes)
52+
}
53+
}
54+
55+
impl<'a, const N: usize> SerializeAs<Box<[u8; N]>> for Bytes {
56+
fn serialize_as<S>(bytes: &Box<[u8; N]>, serializer: S) -> Result<S::Ok, S::Error>
57+
where
58+
S: Serializer,
59+
{
60+
serializer.serialize_bytes(&**bytes)
61+
}
62+
}

0 commit comments

Comments
 (0)