Skip to content

Commit 5025948

Browse files
authored
CLDR-18712 Don't include grammar for new units (#4784)
1 parent 43b8169 commit 5025948

File tree

2 files changed

+158
-12
lines changed

2 files changed

+158
-12
lines changed

tools/cldr-code/src/main/java/org/unicode/cldr/util/GrammarInfo.java

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -758,8 +758,29 @@ public static Set<String> getGrammarLocales() {
758758
"knot", // US/UK specific
759759
"astronomical-unit", // specialized
760760
"dalton", // specialized
761-
"electronvolt" // specialized
762-
);
761+
"electronvolt", // specialized
762+
763+
// specialized
764+
"g-force",
765+
"steradian",
766+
"katal",
767+
"ofglucose",
768+
"part",
769+
"coulomb",
770+
"farad",
771+
"henry",
772+
"siemens",
773+
"becquerel",
774+
"calorie-it",
775+
"gray",
776+
"sievert",
777+
"kilogram-force",
778+
"em",
779+
"tesla",
780+
"weber",
781+
"ofhg",
782+
"light-speed",
783+
"fluid-ounce-metric");
763784

764785
public static Set<String> getSpecialsToTranslate() {
765786
return INCLUDE_OTHER;
@@ -770,11 +791,12 @@ public static Set<String> getSpecialsToTranslate() {
770791
/** Internal class for thread-safety */
771792
static class UnitsToAddGrammar {
772793
static final Set<String> data;
794+
static final Set<String> skipped;
773795

774796
static {
775797
final CLDRConfig config = CLDRConfig.getInstance();
776798
final UnitConverter converter = config.getSupplementalDataInfo().getUnitConverter();
777-
Set<String> missing = new TreeSet<>();
799+
Set<String> _skipped = new TreeSet<>();
778800
Set<String> _data = new TreeSet<>();
779801
for (String path :
780802
With.in(
@@ -784,22 +806,31 @@ static class UnitsToAddGrammar {
784806
String unit = parts.getAttributeValue(3, "type");
785807
// Add simple units
786808
String shortUnit = converter.getShortId(unit);
809+
787810
if (INCLUDE_OTHER.contains(shortUnit)) {
788811
_data.add(unit);
789812
continue;
790813
}
791-
if (!EXCLUDE_GRAMMAR.contains(shortUnit)) {
792-
Set<UnitSystem> systems = converter.getSystemsEnum(shortUnit);
793-
// we now add all SI and metric and si_acceptable and metric_adjacent
794-
if (!Collections.disjoint(systems, UnitSystem.SiOrMetric)) {
795-
_data.add(unit);
796-
continue;
797-
}
814+
815+
if (EXCLUDE_GRAMMAR.contains(shortUnit)) {
816+
_skipped.add(unit);
817+
continue;
798818
}
799-
missing.add(unit);
819+
820+
// we now add all SI and metric and si_acceptable and metric_adjacent
821+
822+
Set<UnitSystem> systems = converter.getSystemsEnum(shortUnit);
823+
if (!Collections.disjoint(systems, UnitSystem.SiOrMetric)) {
824+
_data.add(unit);
825+
continue;
826+
}
827+
828+
// and skip the rest
829+
830+
_skipped.add(unit);
800831
}
801832
if (DEBUG)
802-
for (String unit : missing) {
833+
for (String unit : _skipped) {
803834
String shortUnit = converter.getShortId(unit);
804835
System.out.println(
805836
"*Skipping\t"
@@ -812,11 +843,17 @@ static class UnitsToAddGrammar {
812843
+ (converter.isSimple(shortUnit) ? "SIMPLE" : ""));
813844
}
814845
data = ImmutableSet.copyOf(_data);
846+
skipped = ImmutableSet.copyOf(_skipped);
815847
}
816848
}
817849

818850
/** Return the units that we should get grammar information for. */
819851
public static Set<String> getUnitsToAddGrammar() {
820852
return UnitsToAddGrammar.data;
821853
}
854+
855+
/** Return the units that we should get grammar information for. */
856+
public static Set<String> getUnitsToSkipGrammar() {
857+
return UnitsToAddGrammar.skipped;
858+
}
822859
}

tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4758,4 +4758,113 @@ private Multimap<Level, String> getCoverage(String locale, String xpathPrefix) {
47584758
}
47594759
return result;
47604760
}
4761+
4762+
// for ALL units, should have paths for each unit X
4763+
// ldml/units/unitLength[@type="long"]/unit[@type="X"]/gender
4764+
// ldml/units/unitLength[@type="long"]/unit[@type="X"]/displayName
4765+
// ldml/units/unitLength[@type="short"]/unit[@type="X"]/displayName
4766+
// ldml/units/unitLength[@type="narrow"]/unit[@type="X"]/displayName
4767+
// ldml/units/unitLength[@type="long"]/unit[@type="X"]/unitPattern[@count="other"]
4768+
4769+
// will also have prefix units (long, narrow, short) (all prefixes)
4770+
// ldml/units/unitLength[@type="long"]/compoundUnit[@type="10p-1"]/unitPrefixPattern
4771+
4772+
// will also have per & times units (long, narrow, short)
4773+
// ldml/units/unitLength[@type="long"]/compoundUnit[@type="per"]/compoundUnitPattern
4774+
// ldml/units/unitLength[@type="long"]/compoundUnit[@type="times"]/compoundUnitPattern
4775+
4776+
// will also have long power2/power3 in all available plurals/case
4777+
// ldml/units/unitLength[@type="long"]/compoundUnit[@type="power2"]/compoundUnitPattern1[@count="one"][@gender="feminine"]
4778+
// ldml/units/unitLength[@type="long"]/compoundUnit[@type="power3"]/compoundUnitPattern1[@count="one"][@gender="feminine"]
4779+
4780+
// and short/narrow power2/3, but only for plurals, no case
4781+
// ldml/units/unitLength[@type="short"]/compoundUnit[@type="power2"]/compoundUnitPattern1[@count="one"]
4782+
// ldml/units/unitLength[@type="narrow"]/compoundUnit[@type="power2"]/compoundUnitPattern1[@count="one"]
4783+
4784+
// For ALL units, if there are plurals for the locale, will have other counts
4785+
// ldml/units/unitLength[@type="long"]/unit[@type="X"]/unitPattern[@count="Y"]
4786+
4787+
// For CORE units, if there is grammar for the locale, will have other cases, eg
4788+
// ldml/units/unitLength[@type="long"]/unit[@type="volume-cubic-meter"]/unitPattern[@count="one"][@case="dative"]
4789+
4790+
// For non-CORE units, if even if there is grammar for the locale, we won't have paths
4791+
// ldml/units/unitLength[@type="long"]/unit[@type="volume-fluid-ounce-metric"]/unitPattern[@count="one"][@case="dative"]
4792+
4793+
private static enum GrammarStatus {
4794+
always,
4795+
never
4796+
}
4797+
4798+
public void testSkippedUnitsForGrammer() {
4799+
4800+
// Note: the list for 'never' also includes the enOrJaOnly units (see above in this file).
4801+
4802+
final Multimap<GrammarStatus, String> statusToLongUnit =
4803+
ImmutableMultimap.<GrammarStatus, String>builder()
4804+
.putAll(
4805+
GrammarStatus.never,
4806+
"angle-steradian",
4807+
"area-bu-jp",
4808+
"area-cho",
4809+
"area-se-jp",
4810+
"concentr-katal",
4811+
"concentr-ofglucose",
4812+
"concentr-part",
4813+
"concentr-portion",
4814+
"duration-fortnight",
4815+
"electric-coulomb",
4816+
"electric-farad",
4817+
"electric-henry",
4818+
"electric-siemens",
4819+
"energy-becquerel",
4820+
"energy-british-thermal-unit-it",
4821+
"energy-calorie-it",
4822+
"energy-gray",
4823+
"energy-sievert",
4824+
"force-kilogram-force",
4825+
"length-chain",
4826+
"length-jo-jp",
4827+
"length-ken",
4828+
"length-ri-jp",
4829+
"length-rin",
4830+
"length-rod",
4831+
"length-shaku-cloth",
4832+
"length-shaku-length",
4833+
"length-sun",
4834+
"magnetic-tesla",
4835+
"magnetic-weber",
4836+
"mass-fun",
4837+
"mass-slug",
4838+
"pressure-ofhg",
4839+
"speed-light-speed",
4840+
"temperature-rankine",
4841+
"volume-cup-imperial",
4842+
"volume-cup-jp",
4843+
"volume-fluid-ounce-metric",
4844+
"volume-koku",
4845+
"volume-kosaji",
4846+
"volume-osaji",
4847+
"volume-pint-imperial",
4848+
"volume-sai",
4849+
"volume-shaku",
4850+
"volume-to-jp")
4851+
.putAll(GrammarStatus.always, "meter")
4852+
.build();
4853+
4854+
Set<String> unitsToAdd = GrammarInfo.getUnitsToAddGrammar();
4855+
4856+
assertEquals(
4857+
"Should be missing",
4858+
Set.of(),
4859+
Sets.intersection(
4860+
new TreeSet<String>(statusToLongUnit.get(GrammarStatus.never)),
4861+
unitsToAdd));
4862+
4863+
assertEquals(
4864+
"Should be present always",
4865+
Set.of(),
4866+
Sets.intersection(
4867+
new TreeSet<String>(statusToLongUnit.get(GrammarStatus.always)),
4868+
unitsToAdd));
4869+
}
47614870
}

0 commit comments

Comments
 (0)