Skip to content

Refactor MeasureUnitParser and update related components #6328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Mar 25, 2025
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 44 additions & 14 deletions components/experimental/src/measure/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,38 +3,70 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use smallvec::SmallVec;
use zerotrie::ZeroTrieSimpleAscii;

use crate::measure::measureunit::MeasureUnit;
use crate::measure::power::get_power;
use crate::measure::si_prefix::get_si_prefix;
use crate::units::InvalidUnitError;

use icu_provider::prelude::*;
use icu_provider::DataError;

use super::provider::si_prefix::{Base, SiPrefix};
use super::provider::single_unit::SingleUnit;

// TODO: add test cases for this parser after adding UnitsTest.txt to the test data.
/// A parser for the CLDR unit identifier (e.g. `meter-per-square-second`)
pub struct MeasureUnitParser<'data> {
pub struct MeasureUnitParser {
/// Contains the trie for the unit identifiers.
units_trie: &'data ZeroTrieSimpleAscii<[u8]>,
payload: DataPayload<super::provider::trie::UnitsTrieV1>,
}

impl<'data> MeasureUnitParser<'data> {
// TODO: revisit the public nature of the API. Maybe we should make it private and add a function to create it from a ConverterFactory.
/// Creates a new MeasureUnitParser from a ZeroTrie payload.
pub fn from_payload(payload: &'data ZeroTrieSimpleAscii<[u8]>) -> Self {
impl MeasureUnitParser {
icu_provider::gen_buffer_data_constructors!(
() -> error: DataError,
functions: [
new: skip,
try_new_with_buffer_provider,
try_new_unstable,
Self,
]
);

/// Creates a new [`MeasureUnitParser`] from compiled data.
///
/// ✨ *Enabled with the `compiled_data` Cargo feature.*
///
/// [📚 Help choosing a constructor](icu_provider::constructors)
#[cfg(feature = "compiled_data")]
pub const fn new() -> Self {
Self {
units_trie: payload,
payload: DataPayload::from_static_ref(crate::provider::Baked::SINGLETON_UNITS_TRIE_V1),
}
}

#[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
pub fn try_new_unstable<D>(provider: &D) -> Result<Self, DataError>
where
D: ?Sized + DataProvider<super::provider::trie::UnitsTrieV1>,
{
let payload = provider.load(DataRequest::default())?.payload;

Ok(Self { payload })
}

// TODO: remove this function after being able to use the `try_new_with_buffer_provider` constructor in `provider/source/src/units/info.rs`.
/// Creates a new [`MeasureUnitParser`] from a [`DataPayload`].
pub fn from_payload(payload: DataPayload<super::provider::trie::UnitsTrieV1>) -> Self {
Self { payload }
}

/// Get the unit id.
/// NOTE:
/// if the unit id is found, the function will return (unit id, part without the unit id and without `-` at the beginning of the remaining part if it exists).
/// if the unit id is not found, the function will return an error.
fn get_unit_id<'a>(&self, part: &'a [u8]) -> Result<(u16, &'a [u8]), InvalidUnitError> {
let mut cursor = self.units_trie.cursor();
let mut cursor = self.payload.get().trie.cursor();
let mut longest_match = Err(InvalidUnitError);

for (i, byte) in part.iter().enumerate() {
Expand Down Expand Up @@ -190,7 +222,7 @@ impl<'data> MeasureUnitParser<'data> {

#[cfg(test)]
mod tests {
use crate::units::converter_factory::ConverterFactory;
use crate::measure::parser::MeasureUnitParser;

#[test]
fn test_parser_cases() {
Expand All @@ -200,10 +232,9 @@ mod tests {
("portion-per-1000000000", 1, 1_000_000_000),
("liter-per-100-kilometer", 2, 100),
];
let parser = MeasureUnitParser::new();

for (input, expected_len, expected_denominator) in test_cases {
let converter_factory = ConverterFactory::new();
let parser = converter_factory.parser();
let measure_unit = parser.try_from_str(input).unwrap();
assert_eq!(measure_unit.single_units.len(), expected_len);
assert_eq!(measure_unit.constant_denominator, expected_denominator);
Expand Down Expand Up @@ -268,8 +299,7 @@ mod tests {
continue;
}

let converter_factory = ConverterFactory::new();
let parser = converter_factory.parser();
let parser = MeasureUnitParser::new();
let measure_unit = parser.try_from_str(input);
if measure_unit.is_ok() {
println!("OK: {}", input);
Expand Down
4 changes: 2 additions & 2 deletions components/experimental/src/measure/provider/trie.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ icu_provider::data_marker!(
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::measure::provider::trie))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct UnitsTrie<'data> {
// TODO: remove this field from units/provider::UnitsInfo once the `MeasureUnit` is fully used in the measurement units.
/// Maps from unit name (e.g. foot or meter) to its unit id. this id can be used to retrieve the conversion information from the `UnitsInfo`.
/// Maps from a unit name (e.g., "foot" or "meter") to its corresponding unit ID.
/// This ID represents the index of this unit in the `UnitsInfo` struct and can be used to retrieve the conversion information.
#[cfg_attr(feature = "serde", serde(borrow))]
pub trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,
}
Expand Down
32 changes: 14 additions & 18 deletions components/experimental/src/units/converter_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use crate::measure::measureunit::MeasureUnit;
use crate::measure::parser::MeasureUnitParser;
use crate::measure::provider::single_unit::SingleUnit;
use crate::units::provider;
use crate::units::ratio::IcuRatio;
Expand Down Expand Up @@ -71,10 +70,6 @@ impl ConverterFactory {
Ok(Self { payload })
}

pub fn parser(&self) -> MeasureUnitParser<'_> {
MeasureUnitParser::from_payload(self.payload.get().units_conversion_trie.as_borrowed())
}

/// Calculates the offset between two units by performing the following steps:
/// 1. Identify the conversion rate from the first unit to the base unit as ConversionRate1: N1/D1 with an Offset1: OffsetN1/OffsetD1.
/// 2. Identify the conversion rate from the second unit to the base unit as ConversionRate2: N2/D2 with an Offset2: OffsetN2/OffsetD2.
Expand Down Expand Up @@ -116,7 +111,7 @@ impl ConverterFactory {
let input_conversion_info = self
.payload
.get()
.convert_infos
.conversion_info
.get(input_unit.single_units[0].unit_id as usize);
debug_assert!(
input_conversion_info.is_some(),
Expand All @@ -127,7 +122,7 @@ impl ConverterFactory {
let output_conversion_info = self
.payload
.get()
.convert_infos
.conversion_info
.get(output_unit.single_units[0].unit_id as usize);
debug_assert!(
output_conversion_info.is_some(),
Expand Down Expand Up @@ -181,7 +176,7 @@ impl ConverterFactory {
let items_from_item = factory
.payload
.get()
.convert_infos
.conversion_info
.get(item.unit_id as usize);

debug_assert!(items_from_item.is_some(), "Failed to get convert info");
Expand Down Expand Up @@ -250,7 +245,7 @@ impl ConverterFactory {
let conversion_info = self
.payload
.get()
.convert_infos
.conversion_info
.get(unit_item.unit_id as usize);
debug_assert!(conversion_info.is_some(), "Failed to get conversion info");
let conversion_info = conversion_info?;
Expand Down Expand Up @@ -339,12 +334,14 @@ impl ConverterFactory {
#[cfg(test)]
mod tests {
use super::ConverterFactory;
use crate::measure::parser::MeasureUnitParser;

#[test]
fn test_converter_factory() {
let factory = ConverterFactory::new();
let input_unit = factory.parser().try_from_str("meter").unwrap();
let output_unit = factory.parser().try_from_str("foot").unwrap();
let parser = MeasureUnitParser::new();
let input_unit = parser.try_from_str("meter").unwrap();
let output_unit = parser.try_from_str("foot").unwrap();
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
let result = converter.convert(&1000.0);
assert!(
Expand All @@ -357,11 +354,9 @@ mod tests {
#[test]
fn test_converter_factory_with_constant_denominator() {
let factory = ConverterFactory::new();
let input_unit = factory
.parser()
.try_from_str("liter-per-100-kilometer")
.unwrap();
let output_unit = factory.parser().try_from_str("mile-per-gallon").unwrap();
let parser = MeasureUnitParser::new();
let input_unit = parser.try_from_str("liter-per-100-kilometer").unwrap();
let output_unit = parser.try_from_str("mile-per-gallon").unwrap();
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
let result = converter.convert(&1.0);
assert!(
Expand All @@ -374,8 +369,9 @@ mod tests {
#[test]
fn test_converter_factory_with_offset() {
let factory = ConverterFactory::new();
let input_unit = factory.parser().try_from_str("celsius").unwrap();
let output_unit = factory.parser().try_from_str("fahrenheit").unwrap();
let parser = MeasureUnitParser::new();
let input_unit = parser.try_from_str("celsius").unwrap();
let output_unit = parser.try_from_str("fahrenheit").unwrap();
let converter = factory.converter::<f64>(&input_unit, &output_unit).unwrap();
let result = converter.convert(&0.0);
assert!(
Expand Down
8 changes: 1 addition & 7 deletions components/experimental/src/units/provider.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

use icu_provider::prelude::*;
use num_bigint::BigInt;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::{ule::AsULE, VarZeroVec, ZeroVec};

use crate::measure::provider::single_unit::SingleUnit;
Expand Down Expand Up @@ -41,15 +40,10 @@ icu_provider::data_marker!(UnitsInfoV1, UnitsInfo<'static>, is_singleton = true)
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::units::provider))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct UnitsInfo<'data> {
// TODO: remove this field once we are using this map from `measure/provider::UnitsTrie`.
/// Maps from unit name (e.g. foot) to it is conversion information.
#[cfg_attr(feature = "serde", serde(borrow))]
pub units_conversion_trie: ZeroTrieSimpleAscii<ZeroVec<'data, u8>>,

/// Contains the conversion information, such as the conversion rate and the base unit.
/// For example, the conversion information for the unit `foot` is `1 foot = 0.3048 meter`.
#[cfg_attr(feature = "serde", serde(borrow))]
pub convert_infos: VarZeroVec<'data, ConversionInfoULE>,
pub conversion_info: VarZeroVec<'data, ConversionInfoULE>,
}

icu_provider::data_struct!(UnitsInfo<'_>, #[cfg(feature = "datagen")]);
Expand Down
8 changes: 4 additions & 4 deletions components/experimental/tests/units/units_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

use core::str::FromStr;

use icu_experimental::measure::parser::MeasureUnitParser;
use icu_experimental::units::converter::UnitsConverter;
use icu_experimental::units::converter_factory::ConverterFactory;
use icu_experimental::units::ratio::IcuRatio;
Expand Down Expand Up @@ -38,7 +39,7 @@ fn test_cldr_unit_tests() {
.collect();

let converter_factory = ConverterFactory::new();
let parser = converter_factory.parser();
let parser = MeasureUnitParser::new();

for test in tests {
let input_unit = parser
Expand Down Expand Up @@ -207,7 +208,7 @@ fn test_units_non_convertible() {
];

let converter_factory = ConverterFactory::new();
let parser = converter_factory.parser();
let parser = MeasureUnitParser::new();

for (input, output) in non_convertible_units.iter() {
let input_unit = parser
Expand Down Expand Up @@ -285,8 +286,7 @@ fn test_unparsable_units() {
"meter second",
];

let converter_factory = ConverterFactory::new();
let parser = converter_factory.parser();
let parser = MeasureUnitParser::new();

unparsable_units.iter().for_each(|unit| {
assert!(
Expand Down
7 changes: 7 additions & 0 deletions ffi/capi/bindings/c/MeasureUnitParser.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 0 additions & 3 deletions ffi/capi/bindings/c/UnitsConverterFactory.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion ffi/capi/bindings/cpp/icu4x/MeasureUnit.d.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 19 additions & 4 deletions ffi/capi/bindings/cpp/icu4x/MeasureUnitParser.d.hpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading