@@ -3,7 +3,6 @@ use crate::{IonResult, UInt};
3
3
use bumpalo:: collections:: Vec as BumpVec ;
4
4
use ice_code:: ice as cold_path;
5
5
use std:: io:: Write ;
6
- use std:: mem;
7
6
8
7
const BITS_PER_U128 : usize = 128 ;
9
8
const BITS_PER_ENCODED_BYTE : usize = 7 ;
@@ -59,83 +58,36 @@ impl FlexUInt {
59
58
// By branching on particular values, we make the value of `num_encoded_bytes` in their
60
59
// corresponding arm `const`. This allows us to use `read_n_bytes` to optimize for those
61
60
// sizes.
62
- let flex_uint = match num_encoded_bytes {
63
- 1 => Self :: read_n_bytes :: < 1 > ( input) ,
64
- 2 => Self :: read_n_bytes :: < 2 > ( input) ,
65
- 3 => Self :: read_n_bytes :: < 3 > ( input) ,
66
- 4 => Self :: read_n_bytes :: < 4 > ( input) ,
61
+ let mut buffer = [ 0u8 ; size_of :: < u64 > ( ) ] ;
62
+ match num_encoded_bytes {
63
+ 1 => Self :: read_n_bytes :: < 1 > ( input, & mut buffer) ,
64
+ 2 => Self :: read_n_bytes :: < 2 > ( input, & mut buffer) ,
65
+ 3 => Self :: read_n_bytes :: < 3 > ( input, & mut buffer) ,
66
+ 4 => Self :: read_n_bytes :: < 4 > ( input, & mut buffer) ,
67
+ // If the number of encoded bytes isn't 1-4, fall back to the general-purpose
68
+ // reading logic.
67
69
_ => break ' common_case,
68
70
} ;
71
+ let value = u64:: from_le_bytes ( buffer) . wrapping_shr ( num_encoded_bytes as u32 ) ;
72
+ let flex_uint = FlexUInt :: new ( num_encoded_bytes, value) ;
69
73
return Ok ( flex_uint) ;
70
74
}
71
75
}
72
- // Calling `read_flex_primitive_as_uint_no_inline` keeps this method small enough that
73
- // the code for the common case can be inlined .
74
- Self :: read_flex_primitive_as_uint_no_inline ( input, offset, "reading a FlexUInt" , false )
76
+ // General-purpose FlexUInt reading logic. Checks for empty input and supports FlexUInts
77
+ // up to U64::MAX .
78
+ Self :: read_flex_primitive_as_uint ( input, offset, "reading a FlexUInt" )
75
79
}
76
80
77
81
#[ inline]
78
- pub fn read_n_bytes < const NUM_BYTES : usize > ( bytes : & [ u8 ] ) -> FlexUInt {
82
+ pub fn read_n_bytes < const NUM_BYTES : usize > ( bytes : & [ u8 ] , buffer : & mut [ u8 ; size_of :: < u64 > ( ) ] ) {
79
83
let input: [ u8 ; NUM_BYTES ] = * ( bytes. first_chunk :: < NUM_BYTES > ( ) . unwrap ( ) ) ;
80
- let mut buffer = [ 0u8 ; size_of :: < u64 > ( ) ] ;
81
84
* buffer. first_chunk_mut :: < NUM_BYTES > ( ) . unwrap ( ) = input;
82
- let value = u64:: from_le_bytes ( buffer)
83
- . checked_shr ( NUM_BYTES as u32 )
84
- . unwrap_or ( 0 ) ;
85
- FlexUInt :: new ( NUM_BYTES , value)
86
85
}
87
86
88
- /// Helper method that reads a [`FlexUInt`] with 7 or fewer bytes of magnitude from the buffer.
89
- // Caller must confirm that `bytes` has at least 8 bytes.
90
- #[ inline( never) ]
91
- fn read_small_flex_uint ( bytes : & [ u8 ] ) -> FlexUInt {
92
- debug_assert ! ( bytes. len( ) >= 8 ) ;
93
- let num_encoded_bytes = bytes[ 0 ] . trailing_zeros ( ) as usize + 1 ;
94
- let num_encoded_bits = 8 * num_encoded_bytes;
95
- // Get a mask with the low 'n' bits set
96
- // TODO: Should this be a const cache of num_encoded_bits -> mask?
97
- let mask = 1u64
98
- . checked_shl ( num_encoded_bits as u32 )
99
- . map ( |v| v - 1 )
100
- . unwrap_or ( u64:: MAX ) ;
101
- // Convert our longer-than-8-bytes slice to a fixed sized 8-byte array that we can convert
102
- // to a u64 directly.
103
- let fixed_size_input: [ u8 ; 8 ] = bytes[ ..8 ] . try_into ( ) . unwrap ( ) ;
104
- // This step will often read unrelated bytes from beyond the FlexUInt, but they are
105
- // discarded in the shift operation that follows.
106
- let encoded_value = u64:: from_le_bytes ( fixed_size_input) ;
107
- // Note that `num_encoded_bytes` is also the number of continuation flags that we need
108
- // to discard via right shifting.
109
- let value = ( encoded_value & mask) >> num_encoded_bytes;
110
- FlexUInt :: new ( num_encoded_bytes, value)
111
- }
112
-
113
- #[ inline( never) ]
114
- pub ( crate ) fn read_flex_primitive_as_uint_no_inline (
115
- input : & [ u8 ] ,
116
- offset : usize ,
117
- label : & ' static str ,
118
- support_sign_extension : bool ,
119
- ) -> IonResult < FlexUInt > {
120
- Self :: read_flex_primitive_as_uint ( input, offset, label, support_sign_extension)
121
- }
122
-
123
- /// Helper method that reads a flex-encoded primitive from the buffer, returning it as a `FlexUInt`.
124
- /// If an error occurs while reading, its description will include the supplied `label`.
125
- ///
126
- /// The current implementation supports flex primitives with up to 64 bits of representation
127
- /// beyond the leading header bits. Flex primitives requiring 10 bytes to encode have 70 magnitude
128
- /// bits. If this value is unsigned (`support_sign_extension=false`), the six bits beyond the
129
- /// supported 64 must all be `0`. If this value will later be re-interpreted as a signed value,
130
- /// (`support_sign_extension=true`), then the six bits beyond the supported 64 must all be the
131
- /// same as the 64th (highest supported) bit. This will allow encodings of up to 70 bits
132
- /// to be correctly interpreted as positive, negative, or beyond the bounds of the 64 bit
133
- /// limitation.
134
87
pub ( crate ) fn read_flex_primitive_as_uint (
135
88
input : & [ u8 ] ,
136
89
offset : usize ,
137
90
label : & ' static str ,
138
- support_sign_extension : bool ,
139
91
) -> IonResult < FlexUInt > {
140
92
// A closure that generates an incomplete data result at the current offset. This can be invoked
141
93
// in a variety of early-return cases in this method.
@@ -146,147 +98,30 @@ impl FlexUInt {
146
98
return incomplete ( ) ;
147
99
}
148
100
149
- // The `from_le_bytes` method we use to interpret data requires at least 8 bytes to be available.
150
- // There can be 1-2 bytes of header for a u64, leading to a maximum size of 10 bytes. If the input
151
- // buffer doesn't have at least 10 bytes, copy its contents into a temporary buffer that's
152
- // padded with 0 bytes. We round the size of the temp buffer to 16 as it produces slightly
153
- // nicer assembly than 10.
154
- let mut buffer = [ 0u8 ; 16 ] ;
155
- let bytes = if bytes_available >= 10 {
156
- input
157
- } else {
158
- buffer[ 0 ..bytes_available] . copy_from_slice ( input) ;
159
- & buffer[ ..]
101
+ let num_encoded_bytes = match input[ 0 ] {
102
+ // If the first byte is zero, we're not done reading the length bits yet.
103
+ // Confirm that we have more than just one byte remaining in input.
104
+ 0 if input. len ( ) == 1 => return incomplete ( ) ,
105
+ // The number of trailing zeros in the second byte plus the 8 trailing
106
+ // zeros from the first byte.
107
+ 0 => ( input[ 1 ] . trailing_zeros ( ) as usize + 1 ) + 8 ,
108
+ // Otherwise, use the number of trailing zeros from the first byte.
109
+ first_byte => first_byte. trailing_zeros ( ) as usize + 1 ,
160
110
} ;
161
111
162
- let first_byte = bytes[ 0 ] ;
163
- // If the first byte is not zero, the FlexUInt is 7 or fewer bytes.
164
- if first_byte != 0 {
165
- let num_encoded_bytes = first_byte. trailing_zeros ( ) as usize + 1 ;
166
- // Note that `bytes_available` is the number of bytes in the original unpadded input.
167
- // Our buffer may be 16 bytes long but only `bytes_available` of those are meaningful.
168
- if bytes_available < num_encoded_bytes {
169
- return incomplete ( ) ;
170
- }
171
- // At this point, we know the original input contained all of the FlexUInt's bytes.
172
- // We can call `read_small_flex_uint` with the now-padded version of the buffer.
173
- // It will discard any bytes that are not part of the FlexUInt.
174
- let flex_uint = Self :: read_small_flex_uint ( bytes) ;
175
- return Ok ( flex_uint) ;
112
+ if num_encoded_bytes > 10 {
113
+ return IonResult :: decoding_error (
114
+ "maximum supported serialized FlexUInt size is 10 bytes" ,
115
+ ) ;
176
116
}
177
-
178
- cold_path ! { {
179
- // If we reach this point, the first byte was a zero. The FlexUInt is at least 9 bytes in size.
180
- // We need to inspect the second byte to see how many more prefix bits there are.
181
- if bytes_available < 2 {
182
- return incomplete( ) ;
183
- }
184
- let second_byte = bytes[ 1 ] ;
185
-
186
- if second_byte & 0b11 == 0b00 {
187
- // The flag bits in the second byte indicate at least two more bytes, meaning the total
188
- // length is more than 10 bytes. We're not equipped to handle this.
189
- return IonResult :: decoding_error(
190
- "found a >10 byte Flex(U)Int too large to fit in 64 bits" ,
191
- ) ;
192
- }
193
-
194
- if second_byte & 0b11 == 0b10 {
195
- // The lowest bit of the second byte is empty, the next lowest is not. The encoding
196
- // is 10 bytes.
197
-
198
- if bytes_available < 10 {
199
- return incomplete( ) ;
200
- }
201
-
202
- let flex_uint = Self :: read_10_byte_flex_primitive_as_uint(
203
- support_sign_extension,
204
- bytes,
205
- second_byte,
206
- ) ?;
207
- return Ok ( flex_uint) ;
208
- }
209
-
210
- // The lowest bit of the second byte is set. The encoding is 9 bytes.
211
- if bytes_available < 9 {
212
- return incomplete( ) ;
213
- }
214
- // There are 57-63 bits of magnitude. We can decode the remaining bytes in a u64.
215
- let remaining_data = & bytes[ 1 ..9 ] ;
216
- // We know that the slice is 8 bytes long, so we can unwrap() the conversion to [u8; 8]
217
- // Lop off the lowest bit to discard the `end` flag.
218
- let value = u64 :: from_le_bytes( remaining_data[ ..8 ] . try_into( ) . unwrap( ) ) >> 1 ;
219
- let flex_uint = FlexUInt :: new( 9 , value) ;
220
- Ok ( flex_uint)
221
- } }
222
- }
223
-
224
- /// Helper method to handle flex primitives whose encoding requires 10 bytes. This case is
225
- /// complex because it requires evaluating data beyond the supported 64 bits of representation
226
- /// to detect overflow and support signed re-interpretation.
227
- fn read_10_byte_flex_primitive_as_uint (
228
- support_sign_extension : bool ,
229
- input : & [ u8 ] ,
230
- second_byte : u8 ,
231
- ) -> IonResult < FlexUInt > {
232
- // There are 10 prefix (continuation) bits, 64 bits of magnitude, and 6 bits of sign
233
- // extension (if enabled). We cannot store the highest 6 bits, so this method just checks
234
- // to make sure that they do not modify the meaning of the value in the lower 64 bits.
235
- // For signed values, this means the 6 extra bits must all be the same as the 64th bit.
236
- // For unsigned values, this means that the 6 extra bits must all be `0`.
237
- //
238
- // Little Endian byte diagram:
239
- //
240
- // b0 b1 b2 b3
241
- // PPPPPPPP MMMMMMPP MMMMMMMM MMMMMMMM
242
- // b4 b5 b6 b7
243
- // MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
244
- // b8 b9
245
- // MMMMMMMM XXXXXXMM
246
- //
247
- // P = Prefix bit
248
- // M = Magnitude bit
249
- // X = An 'extra' bit; if `support_sign_extension` is true, these are sign bits.
250
-
251
- // We've already processed the first byte, and we've looked at the lowest two bits of
252
- // the second byte. Isolate the highest six bits of the second byte (b1) which represent
253
- // the lowest six bits of the magnitude.
254
- let magnitude_low_six = second_byte >> 2 ;
255
- // Load the remaining 8 bytes into a u64 that we can easily shift/mask.
256
- let remaining_data = & input[ 2 ..10 ] ;
257
- // We know the slice is 8 bytes long, so we can `unwrap()` the conversion to [u8; 8]
258
- let remaining_magnitude = u64:: from_le_bytes ( remaining_data. try_into ( ) . unwrap ( ) ) ;
259
-
260
- let sign_extension_bits = ( remaining_magnitude & ( 0b111111 << 58 ) ) >> 58 ;
261
- if support_sign_extension {
262
- // Something downstream intends to use this as a signed value; we need to make sure
263
- // that bits 65-70 match bit 64. `remaining_magnitude` is storing 58 bits of data,
264
- // so bit 64 of the value (bit index=63) is bit 58 (bit index=57) in `remaining_magnitude`.
265
- let high_bit_is_set = remaining_magnitude & ( 1 << 57 ) != 0 ;
266
- if ( high_bit_is_set && sign_extension_bits != 0b111111 )
267
- || ( !high_bit_is_set && sign_extension_bits != 0 )
268
- {
269
- // If the sign extension bits don't agree with the top bit, this value required
270
- // more than 64 bits to encode.
271
- return IonResult :: decoding_error (
272
- "found a 10-byte FlexInt too large to fit in a i64" ,
273
- ) ;
274
- }
275
- } else {
276
- // This is an unsigned value; if any of the highest six bits are set, then this
277
- // value is beyond the magnitude we can store in a u64.
278
- if sign_extension_bits != 0 {
279
- return IonResult :: decoding_error (
280
- "found a 10-byte FlexUInt too large to fit in a u64" ,
281
- ) ;
282
- }
117
+ if num_encoded_bytes > input. len ( ) {
118
+ return incomplete ( ) ;
283
119
}
284
-
285
- // Shift the magnitude from the last 8 bytes over and combine it with the six bits we
286
- // carried over from the second byte.
287
- let value = ( remaining_magnitude << 6 ) | magnitude_low_six as u64 ;
288
- let flex_uint = FlexUInt :: new ( 10 , value) ;
289
- Ok ( flex_uint)
120
+ let mut buffer = [ 0u8 ; size_of :: < u128 > ( ) ] ;
121
+ ( & mut buffer[ ..num_encoded_bytes] ) . copy_from_slice ( & input[ ..num_encoded_bytes] ) ;
122
+ let big_value = u128:: from_le_bytes ( buffer) . wrapping_shr ( num_encoded_bytes as u32 ) ;
123
+ let value = big_value as u64 ;
124
+ Ok ( FlexUInt :: new ( num_encoded_bytes, value) )
290
125
}
291
126
292
127
#[ inline]
@@ -310,7 +145,7 @@ impl FlexUInt {
310
145
// can be encoded entirely within a u128, which offers native shifting and masking operations.
311
146
// FlexUInts are used to represent symbol/macro table addresses and byte lengths, so 112 bits of
312
147
// magnitude should be sufficient for all but the most extreme use cases.
313
- const MAX_FLEX_UINT_ENCODED_SIZE_IN_BYTES : usize = mem :: size_of :: < u128 > ( ) ;
148
+ const MAX_FLEX_UINT_ENCODED_SIZE_IN_BYTES : usize = size_of :: < u128 > ( ) ;
314
149
315
150
#[ inline]
316
151
pub fn write < W : Write > ( output : & mut W , value : impl Into < UInt > ) -> IonResult < usize > {
0 commit comments