Skip to content

Commit 5f41720

Browse files
authored
Refactor MeasureUnitParser and update related components (#6328)
1 parent f128c80 commit 5f41720

33 files changed

+323
-358
lines changed

components/experimental/src/measure/parser.rs

+53-14
Original file line numberDiff line numberDiff line change
@@ -3,38 +3,79 @@
33
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
44

55
use smallvec::SmallVec;
6-
use zerotrie::ZeroTrieSimpleAscii;
76

87
use crate::measure::measureunit::MeasureUnit;
98
use crate::measure::power::get_power;
109
use crate::measure::si_prefix::get_si_prefix;
1110
use crate::units::InvalidUnitError;
1211

12+
use icu_provider::prelude::*;
13+
use icu_provider::DataError;
14+
1315
use super::provider::si_prefix::{Base, SiPrefix};
1416
use super::provider::single_unit::SingleUnit;
1517

1618
// TODO: add test cases for this parser after adding UnitsTest.txt to the test data.
1719
/// A parser for the CLDR unit identifier (e.g. `meter-per-square-second`)
18-
pub struct MeasureUnitParser<'data> {
20+
pub struct MeasureUnitParser {
1921
/// Contains the trie for the unit identifiers.
20-
units_trie: &'data ZeroTrieSimpleAscii<[u8]>,
22+
payload: DataPayload<super::provider::trie::UnitsTrieV1>,
23+
}
24+
25+
#[cfg(feature = "compiled_data")]
26+
impl Default for MeasureUnitParser {
27+
/// Creates a new [`MeasureUnitParser`] from compiled data.
28+
///
29+
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
30+
///
31+
/// [📚 Help choosing a constructor](icu_provider::constructors)
32+
#[cfg(feature = "compiled_data")]
33+
fn default() -> Self {
34+
Self {
35+
payload: DataPayload::from_static_ref(crate::provider::Baked::SINGLETON_UNITS_TRIE_V1),
36+
}
37+
}
2138
}
2239

23-
impl<'data> MeasureUnitParser<'data> {
24-
// TODO: revisit the public nature of the API. Maybe we should make it private and add a function to create it from a ConverterFactory.
25-
/// Creates a new MeasureUnitParser from a ZeroTrie payload.
26-
pub fn from_payload(payload: &'data ZeroTrieSimpleAscii<[u8]>) -> Self {
40+
impl MeasureUnitParser {
41+
/// Creates a new [`MeasureUnitParser`] from compiled data.
42+
///
43+
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
44+
///
45+
/// [📚 Help choosing a constructor](icu_provider::constructors)
46+
#[cfg(feature = "compiled_data")]
47+
pub const fn new() -> Self {
2748
Self {
28-
units_trie: payload,
49+
payload: DataPayload::from_static_ref(crate::provider::Baked::SINGLETON_UNITS_TRIE_V1),
2950
}
3051
}
3152

53+
#[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
54+
pub fn try_new_unstable<D>(provider: &D) -> Result<Self, DataError>
55+
where
56+
D: ?Sized + DataProvider<super::provider::trie::UnitsTrieV1>,
57+
{
58+
let payload = provider.load(DataRequest::default())?.payload;
59+
60+
Ok(Self { payload })
61+
}
62+
63+
icu_provider::gen_buffer_data_constructors!(
64+
() -> error: DataError,
65+
functions: [
66+
new: skip,
67+
try_new_with_buffer_provider,
68+
try_new_unstable,
69+
Self,
70+
]
71+
);
72+
3273
/// Get the unit id.
3374
/// NOTE:
3475
/// if the unit id is found, the function will return (unit id, part without the unit id and without `-` at the beginning of the remaining part if it exists).
3576
/// if the unit id is not found, the function will return an error.
3677
fn get_unit_id<'a>(&self, part: &'a [u8]) -> Result<(u16, &'a [u8]), InvalidUnitError> {
37-
let mut cursor = self.units_trie.cursor();
78+
let mut cursor = self.payload.get().trie.cursor();
3879
let mut longest_match = Err(InvalidUnitError);
3980

4081
for (i, byte) in part.iter().enumerate() {
@@ -190,7 +231,7 @@ impl<'data> MeasureUnitParser<'data> {
190231

191232
#[cfg(test)]
192233
mod tests {
193-
use crate::units::converter_factory::ConverterFactory;
234+
use crate::measure::parser::MeasureUnitParser;
194235

195236
#[test]
196237
fn test_parser_cases() {
@@ -200,10 +241,9 @@ mod tests {
200241
("portion-per-1000000000", 1, 1_000_000_000),
201242
("liter-per-100-kilometer", 2, 100),
202243
];
244+
let parser = MeasureUnitParser::default();
203245

204246
for (input, expected_len, expected_denominator) in test_cases {
205-
let converter_factory = ConverterFactory::new();
206-
let parser = converter_factory.parser();
207247
let measure_unit = parser.try_from_str(input).unwrap();
208248
assert_eq!(measure_unit.single_units.len(), expected_len);
209249
assert_eq!(measure_unit.constant_denominator, expected_denominator);
@@ -268,8 +308,7 @@ mod tests {
268308
continue;
269309
}
270310

271-
let converter_factory = ConverterFactory::new();
272-
let parser = converter_factory.parser();
311+
let parser = MeasureUnitParser::default();
273312
let measure_unit = parser.try_from_str(input);
274313
if measure_unit.is_ok() {
275314
println!("OK: {}", input);

components/experimental/src/measure/provider/trie.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,8 @@ icu_provider::data_marker!(
4242
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::measure::provider::trie))]
4343
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
4444
pub struct UnitsTrie<'data> {
45-
// TODO: remove this field from units/provider::UnitsInfo once the `MeasureUnit` is fully used in the measurement units.
46-
/// Maps from unit name (e.g. foot or meter) to its unit id. this id can be used to retrieve the conversion information from the `UnitsInfo`.
45+
/// Maps from a unit name (e.g., "foot" or "meter") to its corresponding unit ID.
46+
/// This ID represents the index of this unit in the `UnitsInfo` struct and can be used to retrieve the conversion information.
4747
#[cfg_attr(feature = "serde", serde(borrow))]
4848
pub trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
4949
}

components/experimental/src/units/converter_factory.rs

+14-18
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
44

55
use crate::measure::measureunit::MeasureUnit;
6-
use crate::measure::parser::MeasureUnitParser;
76
use crate::measure::provider::single_unit::SingleUnit;
87
use crate::units::provider;
98
use crate::units::ratio::IcuRatio;
@@ -71,10 +70,6 @@ impl ConverterFactory {
7170
Ok(Self { payload })
7271
}
7372

74-
pub fn parser(&self) -> MeasureUnitParser<'_> {
75-
MeasureUnitParser::from_payload(self.payload.get().units_conversion_trie.as_borrowed())
76-
}
77-
7873
/// Calculates the offset between two units by performing the following steps:
7974
/// 1. Identify the conversion rate from the first unit to the base unit as ConversionRate1: N1/D1 with an Offset1: OffsetN1/OffsetD1.
8075
/// 2. Identify the conversion rate from the second unit to the base unit as ConversionRate2: N2/D2 with an Offset2: OffsetN2/OffsetD2.
@@ -116,7 +111,7 @@ impl ConverterFactory {
116111
let input_conversion_info = self
117112
.payload
118113
.get()
119-
.convert_infos
114+
.conversion_info
120115
.get(input_unit.single_units[0].unit_id as usize);
121116
debug_assert!(
122117
input_conversion_info.is_some(),
@@ -127,7 +122,7 @@ impl ConverterFactory {
127122
let output_conversion_info = self
128123
.payload
129124
.get()
130-
.convert_infos
125+
.conversion_info
131126
.get(output_unit.single_units[0].unit_id as usize);
132127
debug_assert!(
133128
output_conversion_info.is_some(),
@@ -181,7 +176,7 @@ impl ConverterFactory {
181176
let items_from_item = factory
182177
.payload
183178
.get()
184-
.convert_infos
179+
.conversion_info
185180
.get(item.unit_id as usize);
186181

187182
debug_assert!(items_from_item.is_some(), "Failed to get convert info");
@@ -250,7 +245,7 @@ impl ConverterFactory {
250245
let conversion_info = self
251246
.payload
252247
.get()
253-
.convert_infos
248+
.conversion_info
254249
.get(unit_item.unit_id as usize);
255250
debug_assert!(conversion_info.is_some(), "Failed to get conversion info");
256251
let conversion_info = conversion_info?;
@@ -339,12 +334,14 @@ impl ConverterFactory {
339334
#[cfg(test)]
340335
mod tests {
341336
use super::ConverterFactory;
337+
use crate::measure::parser::MeasureUnitParser;
342338

343339
#[test]
344340
fn test_converter_factory() {
345341
let factory = ConverterFactory::new();
346-
let input_unit = factory.parser().try_from_str("meter").unwrap();
347-
let output_unit = factory.parser().try_from_str("foot").unwrap();
342+
let parser = MeasureUnitParser::default();
343+
let input_unit = parser.try_from_str("meter").unwrap();
344+
let output_unit = parser.try_from_str("foot").unwrap();
348345
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
349346
let result = converter.convert(&1000.0);
350347
assert!(
@@ -357,11 +354,9 @@ mod tests {
357354
#[test]
358355
fn test_converter_factory_with_constant_denominator() {
359356
let factory = ConverterFactory::new();
360-
let input_unit = factory
361-
.parser()
362-
.try_from_str("liter-per-100-kilometer")
363-
.unwrap();
364-
let output_unit = factory.parser().try_from_str("mile-per-gallon").unwrap();
357+
let parser = MeasureUnitParser::default();
358+
let input_unit = parser.try_from_str("liter-per-100-kilometer").unwrap();
359+
let output_unit = parser.try_from_str("mile-per-gallon").unwrap();
365360
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
366361
let result = converter.convert(&1.0);
367362
assert!(
@@ -374,8 +369,9 @@ mod tests {
374369
#[test]
375370
fn test_converter_factory_with_offset() {
376371
let factory = ConverterFactory::new();
377-
let input_unit = factory.parser().try_from_str("celsius").unwrap();
378-
let output_unit = factory.parser().try_from_str("fahrenheit").unwrap();
372+
let parser = MeasureUnitParser::default();
373+
let input_unit = parser.try_from_str("celsius").unwrap();
374+
let output_unit = parser.try_from_str("fahrenheit").unwrap();
379375
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
380376
let result = converter.convert(&0.0);
381377
assert!(

components/experimental/src/units/provider.rs

+1-7
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
1212
use icu_provider::prelude::*;
1313
use num_bigint::BigInt;
14-
use zerotrie::ZeroTrieSimpleAscii;
1514
use zerovec::{ule::AsULE, VarZeroVec, ZeroVec};
1615

1716
use crate::measure::provider::single_unit::SingleUnit;
@@ -41,15 +40,10 @@ icu_provider::data_marker!(UnitsInfoV1, UnitsInfo<'static>, is_singleton = true)
4140
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::units::provider))]
4241
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
4342
pub struct UnitsInfo<'data> {
44-
// TODO: remove this field once we are using this map from `measure/provider::UnitsTrie`.
45-
/// Maps from unit name (e.g. foot) to it is conversion information.
46-
#[cfg_attr(feature = "serde", serde(borrow))]
47-
pub units_conversion_trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
48-
4943
/// Contains the conversion information, such as the conversion rate and the base unit.
5044
/// For example, the conversion information for the unit `foot` is `1 foot = 0.3048 meter`.
5145
#[cfg_attr(feature = "serde", serde(borrow))]
52-
pub convert_infos: VarZeroVec<'data, ConversionInfoULE>,
46+
pub conversion_info: VarZeroVec<'data, ConversionInfoULE>,
5347
}
5448

5549
icu_provider::data_struct!(UnitsInfo<'_>, #[cfg(feature = "datagen")]);

components/experimental/tests/units/units_test.rs

+4-4
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
use core::str::FromStr;
66

7+
use icu_experimental::measure::parser::MeasureUnitParser;
78
use icu_experimental::units::converter::UnitsConverter;
89
use icu_experimental::units::converter_factory::ConverterFactory;
910
use icu_experimental::units::ratio::IcuRatio;
@@ -38,7 +39,7 @@ fn test_cldr_unit_tests() {
3839
.collect();
3940

4041
let converter_factory = ConverterFactory::new();
41-
let parser = converter_factory.parser();
42+
let parser = MeasureUnitParser::default();
4243

4344
for test in tests {
4445
let input_unit = parser
@@ -207,7 +208,7 @@ fn test_units_non_convertible() {
207208
];
208209

209210
let converter_factory = ConverterFactory::new();
210-
let parser = converter_factory.parser();
211+
let parser = MeasureUnitParser::default();
211212

212213
for (input, output) in non_convertible_units.iter() {
213214
let input_unit = parser
@@ -285,8 +286,7 @@ fn test_unparsable_units() {
285286
"meter second",
286287
];
287288

288-
let converter_factory = ConverterFactory::new();
289-
let parser = converter_factory.parser();
289+
let parser = MeasureUnitParser::default();
290290

291291
unparsable_units.iter().for_each(|unit| {
292292
assert!(

ffi/capi/bindings/c/MeasureUnitParser.h

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/c/UnitsConverterFactory.h

-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/cpp/icu4x/MeasureUnit.d.hpp

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ffi/capi/bindings/cpp/icu4x/MeasureUnitParser.d.hpp

+19-4
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)