Skip to content

Commit 60af274

Browse files
authored
Merge pull request #1354 from eclipse-zenoh/dev/bytes_raw
Add ZBytes::slices
2 parents 4bcf093 + 7b5f9ae commit 60af274

File tree

2 files changed

+165
-9
lines changed

2 files changed

+165
-9
lines changed

examples/examples/z_bytes.rs

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ fn main() {
3333
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
3434
// let encoding = Encoding::ZENOH_STRING;
3535

36-
// Cow
36+
// Cow<str>
37+
// See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
3738
let input = Cow::from("test");
3839
let payload = ZBytes::from(&input);
3940
let output: Cow<str> = payload.deserialize().unwrap();
@@ -49,6 +50,15 @@ fn main() {
4950
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
5051
// let encoding = Encoding::ZENOH_BYTES;
5152

53+
// Cow<[u8]>
54+
// See [`zenoh::bytes::ZBytes`] documentation for zero-copy behaviour.
55+
let input = Cow::from(vec![1, 2, 3, 4]);
56+
let payload = ZBytes::from(&input);
57+
let output: Cow<[u8]> = payload.into();
58+
assert_eq!(input, output);
59+
// Corresponding encoding to be used in operations like `.put()`, `.reply()`, etc.
60+
// let encoding = Encoding::ZENOH_BYTES;
61+
5262
// Writer & Reader
5363
// serialization
5464
let mut bytes = ZBytes::empty();
@@ -81,6 +91,13 @@ fn main() {
8191
assert_eq!(input[idx], value.unwrap());
8292
}
8393

94+
// Iterator RAW
95+
let input: [i32; 4] = [1, 2, 3, 4];
96+
let payload = ZBytes::from_iter(input.iter());
97+
for slice in payload.slices() {
98+
println!("{:02x?}", slice);
99+
}
100+
84101
// HashMap
85102
let mut input: HashMap<usize, String> = HashMap::new();
86103
input.insert(0, String::from("abc"));

zenoh/src/api/bytes.rs

Lines changed: 147 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ pub trait Deserialize<T> {
106106
///
107107
/// `ZBytes` provides convenient methods to the user for serialization/deserialization based on the default Zenoh serializer [`ZSerde`].
108108
///
109-
/// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
109+
/// **NOTE 1:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
110110
/// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
111111
/// a simple out-of-the-box encoding. [`ZSerde`] is **NOT** by any means the only serializer/deserializer users can use nor a limitation
112112
/// to the types supported by Zenoh. Users are free and encouraged to use any serializer/deserializer of their choice like *serde*,
@@ -185,6 +185,40 @@ pub trait Deserialize<T> {
185185
/// assert_eq!(start, end);
186186
/// ```
187187
///
188+
/// **NOTE 2:** `ZBytes` may store data in non-contiguous regions of memory.
189+
/// The typical case for `ZBytes` to store data in different memory regions is when data is received fragmented from the network.
190+
/// The user then can decided to use [`ZBytes::deserialize`], [`ZBytes::reader`], [`ZBytes::into`], or [`ZBytes::slices`] depending
191+
/// on their needs.
192+
///
193+
/// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`].
194+
/// If `ZBytes` contains all the data in a single memory location, this is guaranteed to be zero-copy. This is the common case for small messages.
195+
/// If `ZBytes` contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
196+
///
197+
/// Example:
198+
/// ```rust
199+
/// use std::borrow::Cow;
200+
/// use zenoh::bytes::ZBytes;
201+
///
202+
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
203+
/// let bytes = ZBytes::from(buf.clone());
204+
/// let deser: Cow<[u8]> = bytes.into();
205+
/// assert_eq!(buf.as_slice(), deser.as_ref());
206+
/// ```
207+
///
208+
/// It is also possible to iterate over the raw data that may be scattered on different memory regions.
209+
/// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`] nor on how many slices a given [`ZBytes`] will be composed of.
210+
/// The only provided guarantee is on the bytes order that is preserved.
211+
///
212+
/// Example:
213+
/// ```rust
214+
/// use zenoh::bytes::ZBytes;
215+
///
216+
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
217+
/// let bytes = ZBytes::from(buf.clone());
218+
/// for slice in bytes.slices() {
219+
/// println!("{:02x?}", slice);
220+
/// }
221+
/// ```
188222
#[repr(transparent)]
189223
#[derive(Clone, Debug, Default, PartialEq, Eq)]
190224
pub struct ZBytes(ZBuf);
@@ -208,7 +242,7 @@ impl ZBytes {
208242
self.0.is_empty()
209243
}
210244

211-
/// Returns the length of the ZBytes.
245+
/// Returns the total number of bytes in the ZBytes.
212246
pub fn len(&self) -> usize {
213247
self.0.len()
214248
}
@@ -229,11 +263,25 @@ impl ZBytes {
229263
}
230264

231265
/// Get a [`ZBytesWriter`] implementing [`std::io::Write`] trait.
266+
///
267+
/// See [`ZBytesWriter`] on how to chain the serialization of different types into a single [`ZBytes`].
232268
pub fn writer(&mut self) -> ZBytesWriter<'_> {
233269
ZBytesWriter(self.0.writer())
234270
}
235271

236-
/// Get a [`ZBytesReader`] implementing [`std::io::Read`] trait.
272+
/// Get a [`ZBytesIterator`] that deserializes a sequence of `T`.
273+
///
274+
/// Example:
275+
/// ```rust
276+
/// use zenoh::bytes::ZBytes;
277+
///
278+
/// let list: Vec<f32> = vec![1.1, 2.2, 3.3];
279+
/// let mut zbs = ZBytes::from_iter(list.iter());
280+
///
281+
/// for (index, elem) in zbs.iter::<f32>().enumerate() {
282+
/// assert_eq!(list[index], elem.unwrap());
283+
/// }
284+
/// ```
237285
pub fn iter<T>(&self) -> ZBytesIterator<'_, T>
238286
where
239287
for<'b> ZSerde: Deserialize<T, Input<'b> = &'b ZBytes>,
@@ -245,6 +293,62 @@ impl ZBytes {
245293
}
246294
}
247295

296+
/// Return an iterator on raw bytes slices contained in the [`ZBytes`].
297+
///
298+
/// [`ZBytes`] may store data in non-contiguous regions of memory, this iterator
299+
/// then allows to access raw data directly without any attempt of deserializing it.
300+
/// Please note that no guarantee is provided on the internal memory layout of [`ZBytes`].
301+
/// The only provided guarantee is on the bytes order that is preserved.
302+
///
303+
/// Please note that [`ZBytes::iter`] will perform deserialization while iterating while [`ZBytes::slices`] will not.
304+
///
305+
/// ```rust
306+
/// use std::io::Write;
307+
/// use zenoh::bytes::ZBytes;
308+
///
309+
/// let buf1: Vec<u8> = vec![1, 2, 3];
310+
/// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
311+
/// let mut zbs = ZBytes::empty();
312+
/// let mut writer = zbs.writer();
313+
/// writer.write(&buf1);
314+
/// writer.write(&buf2);
315+
///
316+
/// // Access the raw content
317+
/// for slice in zbs.slices() {
318+
/// println!("{:02x?}", slice);
319+
/// }
320+
///
321+
/// // Concatenate input in a single vector
322+
/// let buf: Vec<u8> = buf1.into_iter().chain(buf2.into_iter()).collect();
323+
/// // Concatenate raw bytes in a single vector
324+
/// let out: Vec<u8> = zbs.slices().fold(Vec::new(), |mut b, x| { b.extend_from_slice(x); b });
325+
/// // The previous line is the equivalent of
326+
/// // let out: Vec<u8> = zbs.into();
327+
/// assert_eq!(buf, out);
328+
/// ```
329+
///
330+
/// The example below shows how the [`ZBytesWriter::append`] simply appends the slices of one [`ZBytes`]
331+
/// to another and how those slices can be iterated over to access the raw data.
332+
/// ```rust
333+
/// use std::io::Write;
334+
/// use zenoh::bytes::ZBytes;
335+
///
336+
/// let buf1: Vec<u8> = vec![1, 2, 3];
337+
/// let buf2: Vec<u8> = vec![4, 5, 6, 7, 8];
338+
///
339+
/// let mut zbs = ZBytes::empty();
340+
/// let mut writer = zbs.writer();
341+
/// writer.append(ZBytes::from(buf1.clone()));
342+
/// writer.append(ZBytes::from(buf2.clone()));
343+
///
344+
/// let mut iter = zbs.slices();
345+
/// assert_eq!(buf1.as_slice(), iter.next().unwrap());
346+
/// assert_eq!(buf2.as_slice(), iter.next().unwrap());
347+
/// ```
348+
pub fn slices(&self) -> impl Iterator<Item = &[u8]> {
349+
self.0.slices()
350+
}
351+
248352
/// Serialize an object of type `T` as a [`ZBytes`] using the [`ZSerde`].
249353
///
250354
/// ```rust
@@ -293,7 +397,11 @@ impl ZBytes {
293397
ZSerde.serialize(t)
294398
}
295399

296-
/// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
400+
/// Deserialize an object of type `T` using [`ZSerde`].
401+
///
402+
/// See [`ZBytes::serialize`] and [`ZBytes::try_serialize`] for the examples.
403+
///
404+
/// See [`ZBytes::into`] for infallible conversion, e.g. to get raw bytes.
297405
pub fn deserialize<'a, T>(&'a self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
298406
where
299407
ZSerde: Deserialize<T, Input<'a> = &'a ZBytes>,
@@ -302,7 +410,7 @@ impl ZBytes {
302410
ZSerde.deserialize(self)
303411
}
304412

305-
/// Deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
413+
/// Deserialize an object of type `T` using [`ZSerde`].
306414
pub fn deserialize_mut<'a, T>(&'a mut self) -> Result<T, <ZSerde as Deserialize<T>>::Error>
307415
where
308416
ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes>,
@@ -311,7 +419,37 @@ impl ZBytes {
311419
ZSerde.deserialize(self)
312420
}
313421

314-
/// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
422+
/// Infallibly deserialize an object of type `T` using [`ZSerde`].
423+
///
424+
/// To directly access raw data as contiguous slice it is preferred to convert `ZBytes` into a [`std::borrow::Cow<[u8]>`](`std::borrow::Cow`).
425+
/// If [`ZBytes`] contains all the data in a single memory location, then it is guaranteed to be zero-copy. This is the common case for small messages.
426+
/// If [`ZBytes`] contains data scattered in different memory regions, this operation will do an allocation and a copy. This is the common case for large messages.
427+
///
428+
/// ```rust
429+
/// use std::borrow::Cow;
430+
/// use zenoh::bytes::ZBytes;
431+
///
432+
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
433+
/// let bytes = ZBytes::from(buf.clone());
434+
/// let deser: Cow<[u8]> = bytes.into();
435+
/// assert_eq!(buf.as_slice(), deser.as_ref());
436+
/// ```
437+
///
438+
/// An alternative is to convert `ZBytes` into a [`std::vec::Vec<u8>`].
439+
/// Converting to [`std::vec::Vec<u8>`] will always allocate and make a copy.
440+
///
441+
/// ```rust
442+
/// use std::borrow::Cow;
443+
/// use zenoh::bytes::ZBytes;
444+
///
445+
/// let buf: Vec<u8> = vec![0, 1, 2, 3];
446+
/// let bytes = ZBytes::from(buf.clone());
447+
/// let deser: Vec<u8> = bytes.into();
448+
/// assert_eq!(buf.as_slice(), deser.as_slice());
449+
/// ```
450+
///
451+
/// If you want to be sure that no copy is performed at all, then you should use [`ZBytes::slices`].
452+
/// Please note that in this case data may not be contiguous in memory and it is the responsibility of the user to properly parse the raw slices.
315453
pub fn into<'a, T>(&'a self) -> T
316454
where
317455
ZSerde: Deserialize<T, Input<'a> = &'a ZBytes, Error = Infallible>,
@@ -320,7 +458,7 @@ impl ZBytes {
320458
ZSerde.deserialize(self).unwrap_infallible()
321459
}
322460

323-
/// Infallibly deserialize an object of type `T` from a [`Value`] using the [`ZSerde`].
461+
/// Infallibly deserialize an object of type `T` using the [`ZSerde`].
324462
pub fn into_mut<'a, T>(&'a mut self) -> T
325463
where
326464
ZSerde: Deserialize<T, Input<'a> = &'a mut ZBytes, Error = Infallible>,
@@ -553,7 +691,7 @@ where
553691
}
554692

555693
/// The default serializer for [`ZBytes`]. It supports primitives types, such as: `Vec<u8>`, `uX`, `iX`, `fX`, `String`, `bool`.
556-
/// It also supports common Rust serde values like `serde_json::Value`.
694+
/// It also supports common Rust serde values like [`serde_json::Value`].
557695
///
558696
/// **NOTE:** Zenoh semantic and protocol take care of sending and receiving bytes without restricting the actual data types.
559697
/// [`ZSerde`] is the default serializer/deserializer provided for convenience to the users to deal with primitives data types via
@@ -1164,6 +1302,7 @@ impl From<&mut Cow<'_, str>> for ZBytes {
11641302
}
11651303
}
11661304

1305+
/// See [`Deserialize<Cow<'a, [u8]>>`] for guarantees on copies.
11671306
impl<'a> Deserialize<Cow<'a, str>> for ZSerde {
11681307
type Input<'b> = &'a ZBytes;
11691308
type Error = Utf8Error;

0 commit comments

Comments
 (0)