Skip to content

Commit 0bb3a5b

Browse files
authored
Refactor: Persist pattern exactness in datagen (#6484)
1 parent 3697635 commit 0bb3a5b

File tree

2 files changed

+100
-62
lines changed

2 files changed

+100
-62
lines changed

components/datetime/src/provider/skeleton/helpers.rs

-41
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@ use crate::{
1919
},
2020
};
2121

22-
#[cfg(feature = "datagen")]
23-
use crate::provider::pattern::CoarseHourCycle;
24-
2522
// The following scalar values are for testing the suitability of a skeleton's field for the
2623
// given input. Per UTS 35, the better the fit of a pattern, the "lower the distance". In this
2724
// implementation each distance type is separated by an order of magnitiude. This magnitude needs
@@ -519,41 +516,3 @@ pub fn get_best_available_format_pattern<'data>(
519516

520517
BestSkeleton::AllFieldsMatch(closest_format_pattern)
521518
}
522-
523-
impl components::Bag {
524-
#[doc(hidden)] // TODO(#4467): Internal
525-
#[cfg(feature = "datagen")]
526-
pub fn select_pattern<'data>(
527-
self,
528-
skeletons: &DateSkeletonPatterns<'data>,
529-
preferred_hour_cycle: CoarseHourCycle,
530-
length_patterns: &GenericLengthPatterns<'data>,
531-
) -> PatternPlurals<'data> {
532-
use crate::provider::pattern::runtime::Pattern;
533-
use icu_locale_core::preferences::extensions::unicode::keywords::HourCycle;
534-
535-
let default_hour_cycle = match preferred_hour_cycle {
536-
CoarseHourCycle::H11H12 => HourCycle::H12,
537-
CoarseHourCycle::H23 => HourCycle::H23,
538-
};
539-
let fields = self.to_vec_fields(default_hour_cycle);
540-
match create_best_pattern_for_fields(skeletons, length_patterns, &fields, &self, false) {
541-
BestSkeleton::AllFieldsMatch(p) => p,
542-
_ => {
543-
// Build a last-resort pattern that contains all of the requested fields.
544-
// This is NOT in the CLDR standard! Better would be:
545-
// - Use Append Items?
546-
// - Fall back to the format from the Gregorian or Generic calendar?
547-
// - Bubble up an error of some sort?
548-
// See issue: <https://github.com/unicode-org/icu4x/issues/586>
549-
let pattern_items = fields
550-
.into_iter()
551-
.flat_map(|field| [PatternItem::Literal(' '), PatternItem::Field(field)])
552-
.skip(1)
553-
.collect::<Vec<_>>();
554-
let pattern = Pattern::from(pattern_items);
555-
PatternPlurals::SinglePattern(pattern)
556-
}
557-
}
558-
}
559-
}

provider/source/src/datetime/neo_skeleton.rs

+100-21
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use icu::datetime::fieldsets::enums::*;
1010
use icu::datetime::options::Length;
1111
use icu::datetime::provider::calendar::{DateSkeletonPatterns, TimeLengths};
1212
use icu::datetime::provider::fields::components;
13-
use icu::datetime::provider::pattern::{reference, runtime};
13+
use icu::datetime::provider::pattern::{reference, runtime, CoarseHourCycle};
1414
use icu::datetime::provider::skeleton::PatternPlurals;
1515
use icu::datetime::provider::*;
1616
use icu::plurals::PluralElements;
@@ -19,6 +19,71 @@ use icu_provider::prelude::*;
1919

2020
use super::DatagenCalendar;
2121

22+
enum ExactOrSynthetic<T> {
23+
Exact(T),
24+
Synthetic(T),
25+
}
26+
27+
impl<T> ExactOrSynthetic<T> {
28+
pub fn map<V>(self, mut f: impl FnMut(T) -> V) -> ExactOrSynthetic<V> {
29+
use ExactOrSynthetic::*;
30+
match self {
31+
Exact(t) => Exact(f(t)),
32+
Synthetic(t) => Synthetic(f(t)),
33+
}
34+
}
35+
pub fn inner(&self) -> &T {
36+
use ExactOrSynthetic::*;
37+
match self {
38+
Exact(t) => t,
39+
Synthetic(t) => t,
40+
}
41+
}
42+
pub fn into_inner(self) -> T {
43+
use ExactOrSynthetic::*;
44+
match self {
45+
Exact(t) => t,
46+
Synthetic(t) => t,
47+
}
48+
}
49+
}
50+
51+
fn select_pattern<'data>(
52+
bag: components::Bag,
53+
skeletons: &DateSkeletonPatterns<'data>,
54+
preferred_hour_cycle: CoarseHourCycle,
55+
length_patterns: &GenericLengthPatterns<'data>,
56+
) -> ExactOrSynthetic<PatternPlurals<'data>> {
57+
use icu::datetime::provider::pattern::{runtime, PatternItem};
58+
use icu::datetime::provider::skeleton::{create_best_pattern_for_fields, BestSkeleton};
59+
use icu_locale_core::preferences::extensions::unicode::keywords::HourCycle;
60+
61+
let default_hour_cycle = match preferred_hour_cycle {
62+
CoarseHourCycle::H11H12 => HourCycle::H12,
63+
CoarseHourCycle::H23 => HourCycle::H23,
64+
};
65+
let fields = bag.to_vec_fields(default_hour_cycle);
66+
match create_best_pattern_for_fields(skeletons, length_patterns, &fields, &bag, false) {
67+
BestSkeleton::AllFieldsMatch(p) => ExactOrSynthetic::Exact(p),
68+
BestSkeleton::MissingOrExtraFields(p) => ExactOrSynthetic::Synthetic(p),
69+
BestSkeleton::NoMatch => {
70+
// Build a last-resort pattern that contains all of the requested fields.
71+
// This is NOT in the CLDR standard! Better would be:
72+
// - Use Append Items?
73+
// - Fall back to the format from the Gregorian or Generic calendar?
74+
// - Bubble up an error of some sort?
75+
// See issue: <https://github.com/unicode-org/icu4x/issues/586>
76+
let pattern_items = fields
77+
.into_iter()
78+
.flat_map(|field| [PatternItem::Literal(' '), PatternItem::Field(field)])
79+
.skip(1)
80+
.collect::<Vec<_>>();
81+
let pattern = runtime::Pattern::from(pattern_items);
82+
ExactOrSynthetic::Synthetic(PatternPlurals::SinglePattern(pattern))
83+
}
84+
}
85+
}
86+
2287
impl SourceDataProvider {
2388
fn load_neo_skeletons_key<M>(
2489
&self,
@@ -68,24 +133,25 @@ impl SourceDataProvider {
68133
DateSkeletonPatterns::from(&data.datetime_formats.available_formats);
69134

70135
fn expand_pp_to_pe(
71-
pp: PatternPlurals,
72-
) -> PluralElements<icu::datetime::provider::pattern::runtime::Pattern> {
73-
match pp {
136+
value: ExactOrSynthetic<PatternPlurals>,
137+
) -> ExactOrSynthetic<PluralElements<runtime::Pattern>> {
138+
value.map(|pp| match pp {
74139
PatternPlurals::MultipleVariants(variants) => PluralElements::new(variants.other)
75140
.with_zero_value(variants.zero.clone())
76141
.with_one_value(variants.one.clone())
77142
.with_two_value(variants.two.clone())
78143
.with_few_value(variants.few.clone())
79144
.with_many_value(variants.many.clone()),
80145
PatternPlurals::SinglePattern(pattern) => PluralElements::new(pattern),
81-
}
146+
})
82147
}
83148

84149
let [long, medium, short] = [Length::Long, Length::Medium, Length::Short]
85150
.map(|length| to_components_bag(length, attributes, &data))
86151
.map(|components| {
87152
// TODO: Use a Skeleton here in order to retain 'E' vs 'c'
88-
let pattern = expand_pp_to_pe(components.select_pattern(
153+
let pattern = expand_pp_to_pe(select_pattern(
154+
components,
89155
&skeleton_patterns,
90156
time_lengths_v1.preferred_hour_cycle,
91157
&length_combinations_v1,
@@ -105,12 +171,14 @@ impl SourceDataProvider {
105171
components_with_era.era = Some(components::Text::Short);
106172
(
107173
pattern,
108-
Some(expand_pp_to_pe(components_with_full_year.select_pattern(
174+
Some(expand_pp_to_pe(select_pattern(
175+
components_with_full_year,
109176
&skeleton_patterns,
110177
time_lengths_v1.preferred_hour_cycle,
111178
&length_combinations_v1,
112179
))),
113-
Some(expand_pp_to_pe(components_with_era.select_pattern(
180+
Some(expand_pp_to_pe(select_pattern(
181+
components_with_era,
114182
&skeleton_patterns,
115183
time_lengths_v1.preferred_hour_cycle,
116184
&length_combinations_v1,
@@ -125,12 +193,14 @@ impl SourceDataProvider {
125193
components_with_second.second = Some(components::Numeric::Numeric);
126194
(
127195
pattern,
128-
Some(expand_pp_to_pe(components_with_minute.select_pattern(
196+
Some(expand_pp_to_pe(select_pattern(
197+
components_with_minute,
129198
&skeleton_patterns,
130199
time_lengths_v1.preferred_hour_cycle,
131200
&length_combinations_v1,
132201
))),
133-
Some(expand_pp_to_pe(components_with_second.select_pattern(
202+
Some(expand_pp_to_pe(select_pattern(
203+
components_with_second,
134204
&skeleton_patterns,
135205
time_lengths_v1.preferred_hour_cycle,
136206
&length_combinations_v1,
@@ -142,31 +212,37 @@ impl SourceDataProvider {
142212
});
143213
let builder = PackedPatternsBuilder {
144214
standard: LengthPluralElements {
145-
long: long.0.as_ref().map(runtime::Pattern::as_ref),
146-
medium: medium.0.as_ref().map(runtime::Pattern::as_ref),
147-
short: short.0.as_ref().map(runtime::Pattern::as_ref),
215+
long: long.0.inner().as_ref().map(runtime::Pattern::as_ref),
216+
medium: medium.0.inner().as_ref().map(runtime::Pattern::as_ref),
217+
short: short.0.inner().as_ref().map(runtime::Pattern::as_ref),
148218
},
149219
variant0: Some(LengthPluralElements {
150220
long: long
151221
.1
152-
.unwrap_or(long.0.as_ref().map(runtime::Pattern::as_ref)),
222+
.map(|x| x.into_inner())
223+
.unwrap_or(long.0.inner().as_ref().map(runtime::Pattern::as_ref)),
153224
medium: medium
154225
.1
155-
.unwrap_or(medium.0.as_ref().map(runtime::Pattern::as_ref)),
226+
.map(|x| x.into_inner())
227+
.unwrap_or(medium.0.inner().as_ref().map(runtime::Pattern::as_ref)),
156228
short: short
157229
.1
158-
.unwrap_or(short.0.as_ref().map(runtime::Pattern::as_ref)),
230+
.map(|x| x.into_inner())
231+
.unwrap_or(short.0.inner().as_ref().map(runtime::Pattern::as_ref)),
159232
}),
160233
variant1: Some(LengthPluralElements {
161234
long: long
162235
.2
163-
.unwrap_or(long.0.as_ref().map(runtime::Pattern::as_ref)),
236+
.map(|x| x.into_inner())
237+
.unwrap_or(long.0.inner().as_ref().map(runtime::Pattern::as_ref)),
164238
medium: medium
165239
.2
166-
.unwrap_or(medium.0.as_ref().map(runtime::Pattern::as_ref)),
240+
.map(|x| x.into_inner())
241+
.unwrap_or(medium.0.inner().as_ref().map(runtime::Pattern::as_ref)),
167242
short: short
168243
.2
169-
.unwrap_or(short.0.as_ref().map(runtime::Pattern::as_ref)),
244+
.map(|x| x.into_inner())
245+
.unwrap_or(short.0.inner().as_ref().map(runtime::Pattern::as_ref)),
170246
}),
171247
};
172248
Ok(builder.build())
@@ -650,11 +726,14 @@ mod date_skeleton_consistency_tests {
650726
// TODO: Use a Skeleton here in order to retain 'E' vs 'c'
651727
let parsed_skeleton: reference::Pattern = info.skeleton.parse().unwrap();
652728
let components = components::Bag::from(&parsed_skeleton);
653-
let selected_pattern = match components.select_pattern(
729+
let selected_pattern = match select_pattern(
730+
components,
654731
data.skeleton_patterns,
655732
data.preferred_hour_cycle,
656733
data.length_combinations_v1,
657-
) {
734+
)
735+
.into_inner()
736+
{
658737
PatternPlurals::SinglePattern(x) => x,
659738
PatternPlurals::MultipleVariants(_) => unreachable!(),
660739
};

0 commit comments

Comments
 (0)