|
| 1 | +// This file is part of ICU4X. For terms of use, please see the file |
| 2 | +// called LICENSE at the top level of the ICU4X source tree |
| 3 | +// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ). |
| 4 | + |
| 5 | +use crate::ule::{EncodeAsVarULE, UleError, VarULE}; |
| 6 | +use alloc::boxed::Box; |
| 7 | +use core::fmt; |
| 8 | +use core::marker::PhantomData; |
| 9 | +use core::mem::ManuallyDrop; |
| 10 | +use core::ops::Deref; |
| 11 | +use core::ptr::NonNull; |
| 12 | +use zerofrom::ZeroFrom; |
| 13 | + |
| 14 | +/// Copy-on-write type that efficiently represents [`VarULE`] types as their bitstream representation. |
| 15 | +/// |
| 16 | +/// The primary use case for [`VarULE`] types is the ability to store complex variable-length datastructures |
| 17 | +/// inside variable-length collections like [`crate::VarZeroVec`]. |
| 18 | +/// |
| 19 | +/// Underlying this ability is the fact that [`VarULE`] types can be efficiently represented as a flat |
| 20 | +/// bytestream. |
| 21 | +/// |
| 22 | +/// In zero-copy cases, sometimes one wishes to unconditionally use this bytestream representation, for example |
| 23 | +/// to save stack size. A struct with five `Cow<'a, str>`s is not as stack-efficient as a single `Cow` containing |
| 24 | +/// the bytestream representation of, say, `Tuple5VarULE<str, str, str, str, str>`. |
| 25 | +/// |
| 26 | +/// This type helps in this case: It is logically a `Cow<'a, V>`, with some optimizations, that is guaranteed |
| 27 | +/// to serialize as a byte stream in machine-readable scenarios. |
| 28 | +/// |
| 29 | +/// During human-readable serialization, it will fall back to the serde impls on `V`, which ought to have |
| 30 | +/// a human-readable variant. |
| 31 | +pub struct VarZeroCow<'a, V: ?Sized> { |
| 32 | + /// Pointer to data |
| 33 | + /// |
| 34 | + /// # Safety Invariants |
| 35 | + /// |
| 36 | + /// 1. This slice must always be valid as a byte slice |
| 37 | + /// 2. This slice must represent a valid `V` |
| 38 | + /// 3. If `owned` is true, this slice can be freed. |
| 39 | + /// |
| 40 | + /// The slice may NOT have the lifetime of `'a`. |
| 41 | + buf: NonNull<[u8]>, |
| 42 | + /// The buffer is `Box<[u8]>` if true |
| 43 | + owned: bool, |
| 44 | + _phantom: PhantomData<(&'a V, Box<V>)>, |
| 45 | +} |
| 46 | + |
| 47 | +// This is mostly just a `Cow<[u8]>`, safe to implement Send and Sync on |
| 48 | +unsafe impl<'a, V: ?Sized> Send for VarZeroCow<'a, V> {} |
| 49 | +unsafe impl<'a, V: ?Sized> Sync for VarZeroCow<'a, V> {} |
| 50 | + |
| 51 | +impl<'a, V: ?Sized> Clone for VarZeroCow<'a, V> { |
| 52 | + fn clone(&self) -> Self { |
| 53 | + if self.is_owned() { |
| 54 | + // This clones the box |
| 55 | + let b: Box<[u8]> = self.as_bytes().into(); |
| 56 | + let b = ManuallyDrop::new(b); |
| 57 | + let buf: NonNull<[u8]> = (&**b).into(); |
| 58 | + Self { |
| 59 | + // Invariants upheld: |
| 60 | + // 1 & 2: The bytes came from `self` so they're a valid value and byte slice |
| 61 | + // 3: This is owned (we cloned it), so we set owned to true. |
| 62 | + buf, |
| 63 | + owned: true, |
| 64 | + _phantom: PhantomData, |
| 65 | + } |
| 66 | + } else { |
| 67 | + // Unfortunately we can't just use `new_borrowed(self.deref())` since the lifetime is shorter |
| 68 | + Self { |
| 69 | + // Invariants upheld: |
| 70 | + // 1 & 2: The bytes came from `self` so they're a valid value and byte slice |
| 71 | + // 3: This is borrowed (we're sharing a borrow), so we set owned to false. |
| 72 | + buf: self.buf, |
| 73 | + owned: false, |
| 74 | + _phantom: PhantomData, |
| 75 | + } |
| 76 | + } |
| 77 | + } |
| 78 | +} |
| 79 | + |
| 80 | +impl<'a, V: ?Sized> Drop for VarZeroCow<'a, V> { |
| 81 | + fn drop(&mut self) { |
| 82 | + if self.owned { |
| 83 | + unsafe { |
| 84 | + // Safety: (Invariant 3 on buf) |
| 85 | + // since owned is true, this is a valid Box<[u8]> and can be cleaned up |
| 86 | + let _ = Box::<[u8]>::from_raw(self.buf.as_ptr()); |
| 87 | + } |
| 88 | + } |
| 89 | + } |
| 90 | +} |
| 91 | + |
| 92 | +impl<'a, V: VarULE + ?Sized> VarZeroCow<'a, V> { |
| 93 | + /// Construct from a slice. Errors if the slice doesn't represent a valid `V` |
| 94 | + pub fn parse_byte_slice(bytes: &'a [u8]) -> Result<Self, UleError> { |
| 95 | + let val = V::parse_byte_slice(bytes)?; |
| 96 | + Ok(Self::new_borrowed(val)) |
| 97 | + } |
| 98 | + |
| 99 | + /// Construct from an owned slice. Errors if the slice doesn't represent a valid `V` |
| 100 | + pub fn parse_owned_byte_slice(bytes: Box<[u8]>) -> Result<Self, UleError> { |
| 101 | + V::validate_byte_slice(&bytes)?; |
| 102 | + let bytes = ManuallyDrop::new(bytes); |
| 103 | + let buf: NonNull<[u8]> = (&**bytes).into(); |
| 104 | + Ok(Self { |
| 105 | + // Invariants upheld: |
| 106 | + // 1 & 2: The bytes came from `val` so they're a valid value and byte slice |
| 107 | + // 3: This is owned, so we set owned to true. |
| 108 | + buf, |
| 109 | + owned: true, |
| 110 | + _phantom: PhantomData, |
| 111 | + }) |
| 112 | + } |
| 113 | + |
| 114 | + /// Construct from a slice that is known to represent a valid `V` |
| 115 | + /// |
| 116 | + /// # Safety |
| 117 | + /// |
| 118 | + /// `bytes` must be a valid `V`, i.e. it must successfully pass through |
| 119 | + /// `V::parse_byte_slice()` or `V::validate_byte_slice()`. |
| 120 | + pub const unsafe fn from_byte_slice_unchecked(bytes: &'a [u8]) -> Self { |
| 121 | + unsafe { |
| 122 | + // Safety: bytes is an &T which is always non-null |
| 123 | + let buf: NonNull<[u8]> = NonNull::new_unchecked(bytes as *const [u8] as *mut [u8]); |
| 124 | + Self { |
| 125 | + // Invariants upheld: |
| 126 | + // 1 & 2: Passed upstream to caller |
| 127 | + // 3: This is borrowed, so we set owned to false. |
| 128 | + buf, |
| 129 | + owned: false, |
| 130 | + _phantom: PhantomData, |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + /// Construct this from an [`EncodeAsVarULE`] version of the contained type |
| 136 | + /// |
| 137 | + /// Will always construct an owned version |
| 138 | + pub fn from_encodeable<E: EncodeAsVarULE<V>>(encodeable: &E) -> Self { |
| 139 | + let b = crate::ule::encode_varule_to_box(encodeable); |
| 140 | + Self::new_owned(b) |
| 141 | + } |
| 142 | + |
| 143 | + /// Construct a new borrowed version of this |
| 144 | + pub fn new_borrowed(val: &'a V) -> Self { |
| 145 | + unsafe { |
| 146 | + // Safety: val is a valid V, by type |
| 147 | + Self::from_byte_slice_unchecked(val.as_byte_slice()) |
| 148 | + } |
| 149 | + } |
| 150 | + |
| 151 | + /// Construct a new borrowed version of this |
| 152 | + pub fn new_owned(val: Box<V>) -> Self { |
| 153 | + let val = ManuallyDrop::new(val); |
| 154 | + let buf: NonNull<[u8]> = val.as_byte_slice().into(); |
| 155 | + Self { |
| 156 | + // Invariants upheld: |
| 157 | + // 1 & 2: The bytes came from `val` so they're a valid value and byte slice |
| 158 | + // 3: This is owned, so we set owned to true. |
| 159 | + buf, |
| 160 | + owned: true, |
| 161 | + _phantom: PhantomData, |
| 162 | + } |
| 163 | + } |
| 164 | +} |
| 165 | + |
| 166 | +impl<'a, V: ?Sized> VarZeroCow<'a, V> { |
| 167 | + /// Whether or not this is owned |
| 168 | + pub fn is_owned(&self) -> bool { |
| 169 | + self.owned |
| 170 | + } |
| 171 | + |
| 172 | + /// Get the byte representation of this type |
| 173 | + /// |
| 174 | + /// Is also always a valid `V` and can be passed to |
| 175 | + /// `V::from_byte_slice_unchecked()` |
| 176 | + pub fn as_bytes(&self) -> &[u8] { |
| 177 | + // Safety: Invariant 1 on self.buf |
| 178 | + // The valid V invariant comes from Invariant 2 |
| 179 | + unsafe { self.buf.as_ref() } |
| 180 | + } |
| 181 | +} |
| 182 | + |
| 183 | +impl<'a, V: VarULE + ?Sized> Deref for VarZeroCow<'a, V> { |
| 184 | + type Target = V; |
| 185 | + fn deref(&self) -> &V { |
| 186 | + // Safety: From invariant 2 on self.buf |
| 187 | + unsafe { V::from_byte_slice_unchecked(self.as_bytes()) } |
| 188 | + } |
| 189 | +} |
| 190 | + |
| 191 | +impl<'a, V: VarULE + ?Sized> From<&'a V> for VarZeroCow<'a, V> { |
| 192 | + fn from(other: &'a V) -> Self { |
| 193 | + Self::new_borrowed(other) |
| 194 | + } |
| 195 | +} |
| 196 | + |
| 197 | +impl<'a, V: VarULE + ?Sized> From<Box<V>> for VarZeroCow<'a, V> { |
| 198 | + fn from(other: Box<V>) -> Self { |
| 199 | + Self::new_owned(other) |
| 200 | + } |
| 201 | +} |
| 202 | + |
| 203 | +impl<'a, V: VarULE + ?Sized + fmt::Debug> fmt::Debug for VarZeroCow<'a, V> { |
| 204 | + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { |
| 205 | + self.deref().fmt(f) |
| 206 | + } |
| 207 | +} |
| 208 | + |
| 209 | +// We need manual impls since `#[derive()]` is disallowed on packed types |
| 210 | +impl<'a, V: VarULE + ?Sized + PartialEq> PartialEq for VarZeroCow<'a, V> { |
| 211 | + fn eq(&self, other: &Self) -> bool { |
| 212 | + self.deref().eq(other.deref()) |
| 213 | + } |
| 214 | +} |
| 215 | + |
| 216 | +impl<'a, V: VarULE + ?Sized + Eq> Eq for VarZeroCow<'a, V> {} |
| 217 | + |
| 218 | +impl<'a, V: VarULE + ?Sized + PartialOrd> PartialOrd for VarZeroCow<'a, V> { |
| 219 | + fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> { |
| 220 | + self.deref().partial_cmp(other.deref()) |
| 221 | + } |
| 222 | +} |
| 223 | + |
| 224 | +impl<'a, V: VarULE + ?Sized + Ord> Ord for VarZeroCow<'a, V> { |
| 225 | + fn cmp(&self, other: &Self) -> core::cmp::Ordering { |
| 226 | + self.deref().cmp(other.deref()) |
| 227 | + } |
| 228 | +} |
| 229 | + |
| 230 | +// # Safety |
| 231 | +// |
| 232 | +// encode_var_ule_len: Produces the length of the contained bytes, which are known to be a valid V by invariant |
| 233 | +// |
| 234 | +// encode_var_ule_write: Writes the contained bytes, which are known to be a valid V by invariant |
| 235 | +unsafe impl<'a, V: VarULE + ?Sized> EncodeAsVarULE<V> for VarZeroCow<'a, V> { |
| 236 | + fn encode_var_ule_as_slices<R>(&self, _: impl FnOnce(&[&[u8]]) -> R) -> R { |
| 237 | + // unnecessary if the other two are implemented |
| 238 | + unreachable!() |
| 239 | + } |
| 240 | + |
| 241 | + #[inline] |
| 242 | + fn encode_var_ule_len(&self) -> usize { |
| 243 | + self.as_bytes().len() |
| 244 | + } |
| 245 | + |
| 246 | + #[inline] |
| 247 | + fn encode_var_ule_write(&self, dst: &mut [u8]) { |
| 248 | + dst.copy_from_slice(self.as_bytes()) |
| 249 | + } |
| 250 | +} |
| 251 | + |
| 252 | +#[cfg(feature = "serde")] |
| 253 | +impl<'a, V: VarULE + ?Sized + serde::Serialize> serde::Serialize for VarZeroCow<'a, V> { |
| 254 | + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> |
| 255 | + where |
| 256 | + S: serde::Serializer, |
| 257 | + { |
| 258 | + if serializer.is_human_readable() { |
| 259 | + <V as serde::Serialize>::serialize(self.deref(), serializer) |
| 260 | + } else { |
| 261 | + serializer.serialize_bytes(self.as_bytes()) |
| 262 | + } |
| 263 | + } |
| 264 | +} |
| 265 | + |
| 266 | +#[cfg(feature = "serde")] |
| 267 | +impl<'a, 'de: 'a, V: VarULE + ?Sized> serde::Deserialize<'de> for VarZeroCow<'a, V> |
| 268 | +where |
| 269 | + Box<V>: serde::Deserialize<'de>, |
| 270 | +{ |
| 271 | + fn deserialize<Des>(deserializer: Des) -> Result<Self, Des::Error> |
| 272 | + where |
| 273 | + Des: serde::Deserializer<'de>, |
| 274 | + { |
| 275 | + if deserializer.is_human_readable() { |
| 276 | + let b = Box::<V>::deserialize(deserializer)?; |
| 277 | + Ok(Self::new_owned(b)) |
| 278 | + } else { |
| 279 | + let bytes = <&[u8]>::deserialize(deserializer)?; |
| 280 | + Self::parse_byte_slice(bytes).map_err(serde::de::Error::custom) |
| 281 | + } |
| 282 | + } |
| 283 | +} |
| 284 | + |
| 285 | +#[cfg(feature = "databake")] |
| 286 | +impl<'a, V: VarULE + ?Sized> databake::Bake for VarZeroCow<'a, V> { |
| 287 | + fn bake(&self, env: &databake::CrateEnv) -> databake::TokenStream { |
| 288 | + env.insert("zerovec"); |
| 289 | + let bytes = self.as_bytes().bake(env); |
| 290 | + databake::quote! { |
| 291 | + // Safety: Known to come from a valid V since self.as_bytes() is always a valid V |
| 292 | + unsafe { |
| 293 | + zerovec::VarZeroCow::from_byte_slice_unchecked(#bytes) |
| 294 | + } |
| 295 | + } |
| 296 | + } |
| 297 | +} |
| 298 | + |
| 299 | +#[cfg(feature = "databake")] |
| 300 | +impl<'a, V: VarULE + ?Sized> databake::BakeSize for VarZeroCow<'a, V> { |
| 301 | + fn borrows_size(&self) -> usize { |
| 302 | + self.as_bytes().len() |
| 303 | + } |
| 304 | +} |
| 305 | + |
| 306 | +impl<'a, V: VarULE + ?Sized> ZeroFrom<'a, V> for VarZeroCow<'a, V> { |
| 307 | + #[inline] |
| 308 | + fn zero_from(other: &'a V) -> Self { |
| 309 | + Self::new_borrowed(other) |
| 310 | + } |
| 311 | +} |
| 312 | + |
| 313 | +impl<'a, 'b, V: VarULE + ?Sized> ZeroFrom<'a, VarZeroCow<'b, V>> for VarZeroCow<'a, V> { |
| 314 | + #[inline] |
| 315 | + fn zero_from(other: &'a VarZeroCow<'b, V>) -> Self { |
| 316 | + Self::new_borrowed(other) |
| 317 | + } |
| 318 | +} |
| 319 | + |
| 320 | +#[cfg(test)] |
| 321 | +mod tests { |
| 322 | + use super::VarZeroCow; |
| 323 | + use crate::ule::tuplevar::Tuple3VarULE; |
| 324 | + use crate::vecs::VarZeroSlice; |
| 325 | + #[test] |
| 326 | + fn test_cow_roundtrip() { |
| 327 | + type Messy = Tuple3VarULE<str, [u8], VarZeroSlice<str>>; |
| 328 | + let vec = vec!["one", "two", "three"]; |
| 329 | + let messy: VarZeroCow<Messy> = |
| 330 | + VarZeroCow::from_encodeable(&("hello", &b"g\xFF\xFFdbye"[..], vec)); |
| 331 | + |
| 332 | + assert_eq!(messy.a(), "hello"); |
| 333 | + assert_eq!(messy.b(), b"g\xFF\xFFdbye"); |
| 334 | + assert_eq!(&messy.c()[1], "two"); |
| 335 | + |
| 336 | + #[cfg(feature = "serde")] |
| 337 | + { |
| 338 | + let bincode = bincode::serialize(&messy).unwrap(); |
| 339 | + let deserialized: VarZeroCow<Messy> = bincode::deserialize(&bincode).unwrap(); |
| 340 | + assert_eq!( |
| 341 | + messy, deserialized, |
| 342 | + "Single element roundtrips with bincode" |
| 343 | + ); |
| 344 | + assert!(!deserialized.is_owned()); |
| 345 | + |
| 346 | + let json = serde_json::to_string(&messy).unwrap(); |
| 347 | + let deserialized: VarZeroCow<Messy> = serde_json::from_str(&json).unwrap(); |
| 348 | + assert_eq!(messy, deserialized, "Single element roundtrips with serde"); |
| 349 | + } |
| 350 | + } |
| 351 | +} |
0 commit comments