Skip to content

Commit b66a901

Browse files
committed
validate to avoid zv indexing
1 parent 42f77c9 commit b66a901

File tree

3 files changed

+106
-64
lines changed

3 files changed

+106
-64
lines changed

components/collator/src/comparison.rs

Lines changed: 49 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,18 @@ use crate::provider::CollationMetadataV1;
3535
use crate::provider::CollationReordering;
3636
use crate::provider::CollationReorderingV1;
3737
use crate::provider::CollationRootV1;
38-
use crate::provider::CollationSpecialPrimaries;
3938
use crate::provider::CollationSpecialPrimariesV1;
39+
use crate::provider::CollationSpecialPrimariesValidated;
4040
use crate::provider::CollationTailoringV1;
41+
use core::array;
4142
use core::cmp::Ordering;
4243
use core::convert::TryFrom;
4344
use icu_normalizer::provider::DecompositionData;
4445
use icu_normalizer::provider::DecompositionTables;
4546
use icu_normalizer::provider::NormalizerNfdDataV1;
4647
use icu_normalizer::provider::NormalizerNfdTablesV1;
4748
use icu_normalizer::Decomposition;
49+
use icu_provider::marker::ErasedMarker;
4850
use icu_provider::prelude::*;
4951
use smallvec::SmallVec;
5052
use utf16_iter::Utf16CharsEx;
@@ -379,7 +381,7 @@ impl LocaleSpecificDataHolder {
379381
/// Compares strings according to culturally-relevant ordering.
380382
#[derive(Debug)]
381383
pub struct Collator {
382-
special_primaries: DataPayload<CollationSpecialPrimariesV1>,
384+
special_primaries: DataPayload<ErasedMarker<CollationSpecialPrimariesValidated<'static>>>,
383385
root: DataPayload<CollationRootV1>,
384386
tailoring: Option<DataPayload<CollationTailoringV1>>,
385387
jamo: DataPayload<CollationJamoV1>,
@@ -464,7 +466,7 @@ impl Collator {
464466
decompositions: DataPayload<NormalizerNfdDataV1>,
465467
tables: DataPayload<NormalizerNfdTablesV1>,
466468
jamo: DataPayload<CollationJamoV1>,
467-
mut special_primaries: DataPayload<CollationSpecialPrimariesV1>,
469+
special_primaries: DataPayload<CollationSpecialPrimariesV1>,
468470
prefs: CollatorPreferences,
469471
options: CollatorOptions,
470472
) -> Result<Self, DataError>
@@ -489,24 +491,35 @@ impl Collator {
489491
if special_primaries.get().last_primaries.len() <= (MaxVariable::Currency as usize) {
490492
return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
491493
}
492-
if special_primaries.get().last_primaries.len() == (MaxVariable::Currency as usize) {
493-
// Data without compressible bits, add hardcoded data
494-
special_primaries = special_primaries.map_project(|csp, _| CollationSpecialPrimaries {
495-
last_primaries: csp
496-
.last_primaries
497-
.iter()
498-
.chain(
499-
CollationSpecialPrimaries::HARDCODED_FALLBACK
500-
.last_primaries
501-
.iter()
502-
.rev()
503-
.take(4)
504-
.rev(),
494+
let special_primaries = special_primaries.map_project(|csp, _| {
495+
if csp.last_primaries.len()
496+
== (MaxVariable::Currency as usize)
497+
+ core::mem::size_of_val(
498+
&CollationSpecialPrimariesValidated::HARDCODED_FALLBACK.compressible_bytes,
505499
)
506-
.collect(),
507-
..csp
508-
});
509-
}
500+
{
501+
CollationSpecialPrimariesValidated {
502+
compressible_bytes: array::from_fn(|i| {
503+
#[allow(clippy::unwrap_used)] // protected by the if
504+
{
505+
csp.last_primaries
506+
.get((MaxVariable::Currency as usize) + i)
507+
.unwrap()
508+
}
509+
}),
510+
last_primaries: csp.last_primaries.truncated(MaxVariable::Currency as usize),
511+
numeric_primary: csp.numeric_primary,
512+
}
513+
} else {
514+
// Data without compressible bytes, add hardcoded data
515+
CollationSpecialPrimariesValidated {
516+
last_primaries: csp.last_primaries,
517+
compressible_bytes: CollationSpecialPrimariesValidated::HARDCODED_FALLBACK
518+
.compressible_bytes,
519+
numeric_primary: csp.numeric_primary,
520+
}
521+
}
522+
});
510523

511524
Ok(Collator {
512525
special_primaries,
@@ -550,7 +563,7 @@ macro_rules! compare {
550563
/// borrowed version.
551564
#[derive(Debug)]
552565
pub struct CollatorBorrowed<'a> {
553-
special_primaries: &'a CollationSpecialPrimaries<'a>,
566+
special_primaries: &'a CollationSpecialPrimariesValidated<'a>,
554567
root: &'a CollationData<'a>,
555568
tailoring: Option<&'a CollationData<'a>>,
556569
jamo: &'a CollationJamo<'a>,
@@ -586,28 +599,26 @@ impl CollatorBorrowed<'static> {
586599
return Err(DataError::custom("invalid").with_marker(CollationJamoV1::INFO));
587600
}
588601

589-
let mut special_primaries =
590-
crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1;
602+
let special_primaries = crate::provider::Baked::SINGLETON_COLLATION_SPECIAL_PRIMARIES_V1;
591603
// `variant_count` isn't stable yet:
592604
// https://github.com/rust-lang/rust/issues/73662
593605
if special_primaries.last_primaries.len() <= (MaxVariable::Currency as usize) {
594606
return Err(DataError::custom("invalid").with_marker(CollationSpecialPrimariesV1::INFO));
607+
} else if CollationSpecialPrimariesValidated::HARDCODED_FALLBACK.numeric_primary
608+
!= special_primaries.numeric_primary
609+
|| CollationSpecialPrimariesValidated::HARDCODED_FALLBACK
610+
.last_primaries
611+
.iter()
612+
.zip(special_primaries.last_primaries.iter())
613+
.any(|(a, b)| a != b)
614+
{
615+
// Baked data without compressible bits, but not matching hardcoded data
616+
return Err(
617+
DataError::custom("cannot fall back to hardcoded compressible data")
618+
.with_marker(CollationSpecialPrimariesV1::INFO),
619+
);
595620
}
596-
if special_primaries.last_primaries.len() == (MaxVariable::Currency as usize) {
597-
// Baked data without compressible bits, use hardcoded data
598-
if CollationSpecialPrimaries::HARDCODED_FALLBACK.numeric_primary
599-
!= special_primaries.numeric_primary
600-
|| CollationSpecialPrimaries::HARDCODED_FALLBACK
601-
.last_primaries
602-
.iter()
603-
.zip(special_primaries.last_primaries.iter())
604-
.any(|(a, b)| a != b)
605-
{
606-
return Err(DataError::custom("cannot fall back to compressible data")
607-
.with_marker(CollationSpecialPrimariesV1::INFO));
608-
}
609-
special_primaries = CollationSpecialPrimaries::HARDCODED_FALLBACK;
610-
}
621+
let special_primaries = CollationSpecialPrimariesValidated::HARDCODED_FALLBACK;
611622

612623
// Attribute belongs closer to `unwrap`, but
613624
// https://github.com/rust-lang/rust/issues/15701

components/collator/src/provider.rs

Lines changed: 39 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -566,33 +566,49 @@ pub struct CollationSpecialPrimaries<'data> {
566566
pub numeric_primary: u8,
567567
}
568568

569-
impl CollationSpecialPrimaries<'static> {
569+
#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
570+
pub(crate) struct CollationSpecialPrimariesValidated<'data> {
571+
/// The primaries corresponding to `MaxVariable`
572+
/// character classes packed so that each fits in
573+
/// 16 bits. Length must match the number of enum
574+
/// variants in `MaxVariable`, currently 4.
575+
pub last_primaries: ZeroVec<'data, u16>,
576+
/// The high 8 bits of the numeric primary
577+
pub numeric_primary: u8,
578+
/// 256 bits (packed in 16 u16s) to classify every possible
579+
/// byte into compressible or non-compressible.
580+
pub compressible_bytes: [u16; 16],
581+
}
582+
583+
impl CollationSpecialPrimariesValidated<'static> {
570584
pub(crate) const HARDCODED_FALLBACK: &Self = &Self {
571585
last_primaries: zerovec::zerovec!(u16; <u16 as AsULE>::ULE::from_aligned; [
572586
// Last primaries
573587
1286,
574588
3072,
575589
3488,
576590
3840,
577-
// Compressible bytes
578-
0b0000_0000_0000_0000,
579-
0b0000_0000_0000_0000,
580-
0b0000_0000_0000_0000,
581-
0b0000_0000_0000_0000,
582-
0b0000_0000_0000_0000,
583-
0b0000_0000_0000_0000,
584-
0b1111_1111_1111_1110,
585-
0b1111_1111_1111_1111,
586-
0b0000_0000_0000_0001,
587-
0b0000_0000_0000_0000,
588-
0b0000_0000_0000_0000,
589-
0b0000_0000_0000_0000,
590-
0b0000_0000_0000_0000,
591-
0b0000_0000_0000_0000,
592-
0b0000_0000_0000_0000,
593-
0b0100_0000_0000_0000
594591
]),
595592
numeric_primary: 16u8,
593+
compressible_bytes: [
594+
// Compressible bytes
595+
0b0000_0000_0000_0000,
596+
0b0000_0000_0000_0000,
597+
0b0000_0000_0000_0000,
598+
0b0000_0000_0000_0000,
599+
0b0000_0000_0000_0000,
600+
0b0000_0000_0000_0000,
601+
0b1111_1111_1111_1110,
602+
0b1111_1111_1111_1111,
603+
0b0000_0000_0000_0001,
604+
0b0000_0000_0000_0000,
605+
0b0000_0000_0000_0000,
606+
0b0000_0000_0000_0000,
607+
0b0000_0000_0000_0000,
608+
0b0000_0000_0000_0000,
609+
0b0000_0000_0000_0000,
610+
0b0100_0000_0000_0000,
611+
],
596612
};
597613
}
598614

@@ -601,7 +617,7 @@ icu_provider::data_struct!(
601617
#[cfg(feature = "datagen")]
602618
);
603619

604-
impl CollationSpecialPrimaries<'_> {
620+
impl CollationSpecialPrimariesValidated<'_> {
605621
#[allow(clippy::unwrap_used)]
606622
pub(crate) fn last_primary_for_group(&self, max_variable: MaxVariable) -> u32 {
607623
// `unwrap` is OK, because `Collator::try_new` validates the length.
@@ -613,15 +629,12 @@ impl CollationSpecialPrimaries<'_> {
613629

614630
#[allow(dead_code)]
615631
pub(crate) fn is_compressible(&self, b: u8) -> bool {
616-
// Unwrap OK by construction and pasting this
632+
// Indexing slicing OK by construction and pasting this
617633
// into Compiler Explorer shows that the panic
618634
// is optimized away.
619-
#[allow(clippy::unwrap_used)]
620-
let field = self
621-
.last_primaries
622-
.get(self.last_primaries.len() - 4 + usize::from(b >> 4))
623-
.unwrap();
624-
let mask = 1 << (b & 0b111_1111);
635+
#[allow(clippy::indexing_slicing)]
636+
let field = self.compressible_bytes[usize::from(b >> 4)];
637+
let mask = 1 << (b & 0b1111);
625638
(field & mask) != 0
626639
}
627640
}

utils/zerovec/src/zerovec/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,17 @@ impl<U> EyepatchHackVector<U> {
165165
// this always represents a valid vector
166166
Vec::from_raw_parts(self.buf.as_ptr() as *mut U, len, self.capacity)
167167
}
168+
169+
fn truncate(&mut self, max: usize) {
170+
// SAFETY: The elements in buf are `ULE`, so they don't need to be dropped
171+
// even if we own them.
172+
self.buf = unsafe {
173+
NonNull::new_unchecked(core::ptr::slice_from_raw_parts_mut(
174+
self.buf.as_mut().as_mut_ptr(),
175+
core::cmp::max(max, self.buf.as_ref().len()),
176+
))
177+
};
178+
}
168179
}
169180

170181
#[cfg(feature = "alloc")]
@@ -1068,6 +1079,13 @@ where
10681079
Cow::Borrowed(slice)
10691080
}
10701081
}
1082+
1083+
/// Truncates this vector to `min(self.len(), max)`.
1084+
#[inline]
1085+
pub fn truncated(mut self, max: usize) -> Self {
1086+
self.vector.truncate(max);
1087+
self
1088+
}
10711089
}
10721090

10731091
#[cfg(feature = "alloc")]

0 commit comments

Comments
 (0)