@@ -468,97 +468,163 @@ impl DatagenDriver {
468
468
}
469
469
}
470
470
471
+ struct ExplicitImplicitLocaleSets {
472
+ explicit : HashSet < DataLocale > ,
473
+ implicit : HashSet < DataLocale > ,
474
+ }
475
+
476
+ /// Resolves the set of explicit langids and the supported locales into two sets of locales:
477
+ ///
478
+ /// - `explicit` contains the explicit langids but with aux keys and extension keywords included.
479
+ /// For example, if `ar-SA` is requested (explicit langid), and `ar` and `ar-u-nu-latn` are supported,
480
+ /// then `ar-SA` and `ar-SA-u-nu-latn` will be returned as `explicit`.
481
+ /// - `implcit` contains all supported locales reachable by fallback from an `explicit` locale.
482
+ /// These locales can be included without increasing data payload size.
483
+ fn make_explicit_implicit_sets (
484
+ key : DataKey ,
485
+ explicit_langids : & HashSet < LanguageIdentifier > ,
486
+ supported_map : & HashMap < LanguageIdentifier , HashSet < DataLocale > > ,
487
+ fallbacker : & Lazy <
488
+ Result < LocaleFallbacker , DataError > ,
489
+ impl FnOnce ( ) -> Result < LocaleFallbacker , DataError > ,
490
+ > ,
491
+ ) -> Result < ExplicitImplicitLocaleSets , DataError > {
492
+ let mut implicit = HashSet :: new ( ) ;
493
+ // TODO: Make including the default locale configurable
494
+ implicit. insert ( DataLocale :: default ( ) ) ;
495
+
496
+ let mut explicit: HashSet < DataLocale > = Default :: default ( ) ;
497
+ for explicit_langid in explicit_langids. iter ( ) {
498
+ explicit. insert ( explicit_langid. into ( ) ) ;
499
+ if let Some ( locales) = supported_map. get ( explicit_langid) {
500
+ explicit. extend ( locales. iter ( ) . cloned ( ) ) ; // adds ar-EG-u-nu-latn
501
+ }
502
+ if explicit_langid == & LanguageIdentifier :: UND {
503
+ continue ;
504
+ }
505
+ let fallbacker = fallbacker. as_ref ( ) . map_err ( |e| * e) ?;
506
+ let fallbacker_with_config = fallbacker. for_config ( key. fallback_config ( ) ) ;
507
+ let mut iter = fallbacker_with_config. fallback_for ( explicit_langid. into ( ) ) ;
508
+ while !iter. get ( ) . is_und ( ) {
509
+ implicit. insert ( iter. get ( ) . clone ( ) ) ;
510
+ // Inherit aux keys and extension keywords from parent locales
511
+ let iter_langid = iter. get ( ) . get_langid ( ) ;
512
+ if let Some ( locales) = supported_map. get ( & iter_langid) {
513
+ implicit. extend ( locales. iter ( ) . cloned ( ) ) ; // adds ar-u-nu-latn
514
+ for locale in locales {
515
+ let mut morphed_locale = locale. clone ( ) ;
516
+ morphed_locale. set_langid ( explicit_langid. clone ( ) ) ;
517
+ explicit. insert ( morphed_locale) ; // adds ar-SA-u-nu-latn
518
+ }
519
+ }
520
+ iter. step ( ) ;
521
+ }
522
+ }
523
+ Ok ( ExplicitImplicitLocaleSets { explicit, implicit } )
524
+ }
525
+
471
526
/// Selects the maximal set of locales to export based on a [`DataKey`] and this datagen
472
527
/// provider's options bag. The locales may be later optionally deduplicated for fallback.
473
528
fn select_locales_for_key (
474
529
provider : & dyn ExportableProvider ,
475
530
key : DataKey ,
476
531
fallback : FallbackMode ,
477
- locales : Option < & HashSet < LanguageIdentifier > > ,
532
+ explicit_langids : Option < & HashSet < LanguageIdentifier > > ,
478
533
additional_collations : & HashSet < String > ,
479
534
segmenter_models : & [ String ] ,
480
535
fallbacker : & Lazy <
481
536
Result < LocaleFallbacker , DataError > ,
482
537
impl FnOnce ( ) -> Result < LocaleFallbacker , DataError > ,
483
538
> ,
484
539
) -> Result < HashSet < icu_provider:: DataLocale > , DataError > {
485
- let mut result = provider
540
+ // A map from langid to data locales. Keys that have aux keys or extension keywords
541
+ // may have multiple data locales per langid.
542
+ let mut supported_map: HashMap < LanguageIdentifier , HashSet < DataLocale > > = Default :: default ( ) ;
543
+ for locale in provider
486
544
. supported_locales_for_key ( key)
487
545
. map_err ( |e| e. with_key ( key) ) ?
488
- . into_iter ( )
489
- . collect :: < HashSet < DataLocale > > ( ) ;
546
+ {
547
+ use std:: collections:: hash_map:: Entry ;
548
+ match supported_map. entry ( locale. get_langid ( ) ) {
549
+ Entry :: Occupied ( mut entry) => entry. get_mut ( ) . insert ( locale) ,
550
+ Entry :: Vacant ( entry) => entry. insert ( Default :: default ( ) ) . insert ( locale) ,
551
+ } ;
552
+ }
490
553
491
554
if key == icu_segmenter:: provider:: DictionaryForWordOnlyAutoV1Marker :: KEY
492
555
|| key == icu_segmenter:: provider:: DictionaryForWordLineExtendedV1Marker :: KEY
493
556
{
494
- result. retain ( |locale| {
495
- let model = crate :: transform:: segmenter:: dictionary:: data_locale_to_model_name ( locale) ;
496
- segmenter_models. iter ( ) . any ( |m| Some ( m. as_ref ( ) ) == model)
557
+ supported_map. retain ( |_, locales| {
558
+ locales. retain ( |locale| {
559
+ let model =
560
+ crate :: transform:: segmenter:: dictionary:: data_locale_to_model_name ( locale) ;
561
+ segmenter_models. iter ( ) . any ( |m| Some ( m. as_ref ( ) ) == model)
562
+ } ) ;
563
+ !locales. is_empty ( )
497
564
} ) ;
498
565
// Don't perform additional locale filtering
499
- return Ok ( result ) ;
566
+ return Ok ( supported_map . into_values ( ) . flatten ( ) . collect ( ) ) ;
500
567
} else if key == icu_segmenter:: provider:: LstmForWordLineAutoV1Marker :: KEY {
501
- result. retain ( |locale| {
502
- let model = crate :: transform:: segmenter:: lstm:: data_locale_to_model_name ( locale) ;
503
- segmenter_models. iter ( ) . any ( |m| Some ( m. as_ref ( ) ) == model)
568
+ supported_map. retain ( |_, locales| {
569
+ locales. retain ( |locale| {
570
+ let model = crate :: transform:: segmenter:: lstm:: data_locale_to_model_name ( locale) ;
571
+ segmenter_models. iter ( ) . any ( |m| Some ( m. as_ref ( ) ) == model)
572
+ } ) ;
573
+ !locales. is_empty ( )
504
574
} ) ;
505
575
// Don't perform additional locale filtering
506
- return Ok ( result ) ;
576
+ return Ok ( supported_map . into_values ( ) . flatten ( ) . collect ( ) ) ;
507
577
} else if key == icu_collator:: provider:: CollationDataV1Marker :: KEY
508
578
|| key == icu_collator:: provider:: CollationDiacriticsV1Marker :: KEY
509
579
|| key == icu_collator:: provider:: CollationJamoV1Marker :: KEY
510
580
|| key == icu_collator:: provider:: CollationMetadataV1Marker :: KEY
511
581
|| key == icu_collator:: provider:: CollationReorderingV1Marker :: KEY
512
582
|| key == icu_collator:: provider:: CollationSpecialPrimariesV1Marker :: KEY
513
583
{
514
- result. retain ( |locale| {
515
- let Some ( collation) = locale
516
- . get_unicode_ext ( & key ! ( "co" ) )
517
- . and_then ( |co| co. as_single_subtag ( ) . copied ( ) )
518
- else {
519
- return true ;
520
- } ;
521
- additional_collations. contains ( collation. as_str ( ) )
522
- || if collation. starts_with ( "search" ) {
523
- additional_collations. contains ( "search*" )
524
- } else {
525
- ![ "big5han" , "gb2312" ] . contains ( & collation. as_str ( ) )
526
- }
584
+ supported_map. retain ( |_, locales| {
585
+ locales. retain ( |locale| {
586
+ let Some ( collation) = locale
587
+ . get_unicode_ext ( & key ! ( "co" ) )
588
+ . and_then ( |co| co. as_single_subtag ( ) . copied ( ) )
589
+ else {
590
+ return true ;
591
+ } ;
592
+ additional_collations. contains ( collation. as_str ( ) )
593
+ || if collation. starts_with ( "search" ) {
594
+ additional_collations. contains ( "search*" )
595
+ } else {
596
+ ![ "big5han" , "gb2312" ] . contains ( & collation. as_str ( ) )
597
+ }
598
+ } ) ;
599
+ !locales. is_empty ( )
527
600
} ) ;
528
601
}
529
602
530
- result = match ( locales , fallback) {
603
+ let result = match ( explicit_langids , fallback) {
531
604
// Case 1: `None` simply exports all supported locales for this key.
532
- ( None , _) => result ,
605
+ ( None , _) => supported_map . into_values ( ) . flatten ( ) . collect ( ) ,
533
606
// Case 2: `FallbackMode::Preresolved` exports all supported locales whose langid matches
534
607
// one of the explicit locales. This ensures extensions are included. In addition, any
535
608
// explicit locales are added to the list, even if they themselves don't contain data;
536
609
// fallback should be performed upon exporting.
537
- ( Some ( explicit ) , FallbackMode :: Preresolved ) => result
538
- . into_iter ( )
539
- . chain ( explicit . iter ( ) . map ( |langid| langid . into ( ) ) )
540
- . filter ( |locale| explicit. contains ( & locale . get_langid ( ) ) )
541
- . collect ( ) ,
610
+ ( Some ( explicit_langids ) , FallbackMode :: Preresolved ) => {
611
+ let ExplicitImplicitLocaleSets { explicit , .. } =
612
+ make_explicit_implicit_sets ( key , explicit_langids , & supported_map , fallbacker ) ? ;
613
+ explicit
614
+ }
542
615
// Case 3: All other modes resolve to the "ancestors and descendants" strategy.
543
- ( Some ( explicit ) , _) => {
544
- let include_und = explicit . contains ( & LanguageIdentifier :: UND ) ;
545
- let explicit : HashSet < DataLocale > = explicit . iter ( ) . map ( DataLocale :: from ) . collect ( ) ;
546
- let mut implicit = HashSet :: new ( ) ;
547
- // TODO: Make including the default locale configurable
548
- implicit . insert ( DataLocale :: default ( ) ) ;
616
+ ( Some ( explicit_langids ) , _) => {
617
+ let include_und = explicit_langids . contains ( & LanguageIdentifier :: UND ) ;
618
+
619
+ let ExplicitImplicitLocaleSets { explicit , implicit } =
620
+ make_explicit_implicit_sets ( key , explicit_langids , & supported_map , fallbacker ) ? ;
621
+
549
622
let fallbacker = fallbacker. as_ref ( ) . map_err ( |e| * e) ?;
550
623
let fallbacker_with_config = fallbacker. for_config ( key. fallback_config ( ) ) ;
551
624
552
- for locale in explicit. iter ( ) {
553
- let mut iter = fallbacker_with_config. fallback_for ( locale. clone ( ) ) ;
554
- while !iter. get ( ) . is_und ( ) {
555
- implicit. insert ( iter. get ( ) . clone ( ) ) ;
556
- iter. step ( ) ;
557
- }
558
- }
559
-
560
- result
561
- . into_iter ( )
625
+ supported_map
626
+ . into_values ( )
627
+ . flatten ( )
562
628
. chain ( explicit. iter ( ) . cloned ( ) )
563
629
. filter ( |locale_orig| {
564
630
let mut locale = locale_orig. clone ( ) ;
@@ -705,7 +771,7 @@ fn test_collation_filtering() {
705
771
Some ( & HashSet :: from_iter ( [ cas. language . clone ( ) ] ) ) ,
706
772
& HashSet :: from_iter ( cas. include_collations . iter ( ) . copied ( ) . map ( String :: from) ) ,
707
773
& [ ] ,
708
- & once_cell:: sync:: Lazy :: new ( || unreachable ! ( ) ) ,
774
+ & once_cell:: sync:: Lazy :: new ( || Ok ( LocaleFallbacker :: new_without_data ( ) ) ) ,
709
775
)
710
776
. unwrap ( )
711
777
. into_iter ( )
0 commit comments