Skip to content

Commit 4146498

Browse files
authored
Add Indic_Conjunct_Break property. (#6379)
From #6367, @eggrobin suggests to use Indic_Conjunct_Break (InCB) property for Grapheme Cluster Break. Also, `InCB.toml` is incomplete yet like the following, since it is added by ICU76 as a draft API. ``` values = [ {discr = 0, long = "None", short = "None"}, ] ``` It means that names (short / long / parse) are empty for this implementation.
1 parent 8bec1f9 commit 4146498

File tree

16 files changed

+6378
-3
lines changed

16 files changed

+6378
-3
lines changed

components/properties/src/props.rs

+54
Original file line numberDiff line numberDiff line change
@@ -1425,6 +1425,60 @@ make_enumerated_property! {
14251425
ule_ty: u8;
14261426
}
14271427

1428+
/// Property Indic_Conjunct_Break.
1429+
/// See UAX #44:
1430+
/// <https://www.unicode.org/reports/tr44/#Indic_Conjunct_Break>.
1431+
///
1432+
/// # Example
1433+
///
1434+
/// ```
1435+
/// use icu::properties::{CodePointMapData, props::IndicConjunctBreak};
1436+
///
1437+
/// assert_eq!(CodePointMapData::<IndicConjunctBreak>::new().get('a'), IndicConjunctBreak::None);
1438+
/// assert_eq!(CodePointMapData::<IndicConjunctBreak>::new().get('\u{094d}'), IndicConjunctBreak::Linker);
1439+
/// assert_eq!(CodePointMapData::<IndicConjunctBreak>::new().get('\u{0915}'), IndicConjunctBreak::Consonant);
1440+
/// assert_eq!(CodePointMapData::<IndicConjunctBreak>::new().get('\u{0300}'), IndicConjunctBreak::Extend);
1441+
/// ```
1442+
#[doc(hidden)] // draft API in ICU4C
1443+
#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1444+
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1445+
#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1446+
#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1447+
#[allow(clippy::exhaustive_structs)] // newtype
1448+
#[repr(transparent)]
1449+
pub struct IndicConjunctBreak(pub(crate) u8);
1450+
1451+
impl IndicConjunctBreak {
1452+
/// Returns an ICU4C `UIndicConjunctBreak` value.
1453+
pub const fn to_icu4c_value(self) -> u8 {
1454+
self.0
1455+
}
1456+
/// Constructor from an ICU4C `UIndicConjunctBreak` value.
1457+
pub const fn from_icu4c_value(value: u8) -> Self {
1458+
Self(value)
1459+
}
1460+
}
1461+
1462+
create_const_array! {
1463+
#[doc(hidden)] // draft API in ICU4C
1464+
#[allow(non_upper_case_globals)]
1465+
impl IndicConjunctBreak {
1466+
pub const None: IndicConjunctBreak = IndicConjunctBreak(0);
1467+
pub const Consonant: IndicConjunctBreak = IndicConjunctBreak(1);
1468+
pub const Extend: IndicConjunctBreak = IndicConjunctBreak(2);
1469+
pub const Linker: IndicConjunctBreak = IndicConjunctBreak(3);
1470+
}
1471+
}
1472+
1473+
make_enumerated_property! {
1474+
name: "Indic_Conjunct_Break";
1475+
short_name: "InCB";
1476+
ident: IndicConjunctBreak;
1477+
data_marker: crate::provider::PropertyEnumIndicConjunctBreakV1;
1478+
singleton: SINGLETON_PROPERTY_ENUM_INDIC_CONJUNCT_BREAK_V1;
1479+
ule_ty: u8;
1480+
}
1481+
14281482
/// Property Indic_Syllabic_Category.
14291483
/// See UAX #44:
14301484
/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.

components/properties/src/provider.rs

+8
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ const _: () = {
143143
impl_property_enum_general_category_v1!(Baked);
144144
impl_property_enum_grapheme_cluster_break_v1!(Baked);
145145
impl_property_enum_hangul_syllable_type_v1!(Baked);
146+
impl_property_enum_indic_conjunct_break_v1!(Baked);
146147
impl_property_enum_indic_syllabic_category_v1!(Baked);
147148
impl_property_enum_joining_type_v1!(Baked);
148149
impl_property_enum_line_break_v1!(Baked);
@@ -619,6 +620,12 @@ icu_provider::data_marker!(
619620
PropertyCodePointMap<'static, crate::props::HangulSyllableType>,
620621
is_singleton = true,
621622
);
623+
icu_provider::data_marker!(
624+
/// Data marker for the 'IndicConjunctBreak' Unicode property
625+
PropertyEnumIndicConjunctBreakV1,
626+
PropertyCodePointMap<'static, crate::props::IndicConjunctBreak>,
627+
is_singleton = true,
628+
);
622629
icu_provider::data_marker!(
623630
/// Data marker for the 'IndicSyllabicCategory' Unicode property
624631
PropertyEnumIndicSyllabicCategoryV1,
@@ -793,6 +800,7 @@ pub const MARKERS: &[DataMarkerInfo] = &[
793800
PropertyEnumGeneralCategoryV1::INFO,
794801
PropertyEnumGraphemeClusterBreakV1::INFO,
795802
PropertyEnumHangulSyllableTypeV1::INFO,
803+
PropertyEnumIndicConjunctBreakV1::INFO,
796804
PropertyEnumIndicSyllabicCategoryV1::INFO,
797805
PropertyEnumJoiningTypeV1::INFO,
798806
PropertyEnumLineBreakV1::INFO,

components/properties/src/runtime.rs

+1
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@ enum EnumeratedProperty {
138138
GeneralCategory = 0x1005,
139139
GraphemeClusterBreak = 0x1012,
140140
HangulSyllableType = 0x100B,
141+
IndicConjunctBreak = 0x101A,
141142
IndicPositionalCategory = 0x1016,
142143
IndicSyllabicCategory = 0x1017,
143144
JoiningGroup = 0x1006,

components/properties/src/trievalue.rs

+14-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@
55
use crate::bidi::BidiMirroringGlyph;
66
use crate::props::{
77
BidiClass, CanonicalCombiningClass, EastAsianWidth, GeneralCategory, GeneralCategoryGroup,
8-
GraphemeClusterBreak, HangulSyllableType, IndicSyllabicCategory, JoiningType, LineBreak,
9-
Script, SentenceBreak, VerticalOrientation, WordBreak,
8+
GraphemeClusterBreak, HangulSyllableType, IndicConjunctBreak, IndicSyllabicCategory,
9+
JoiningType, LineBreak, Script, SentenceBreak, VerticalOrientation, WordBreak,
1010
};
1111
use crate::script::ScriptWithExt;
1212
use core::convert::TryInto;
@@ -151,6 +151,18 @@ impl TrieValue for SentenceBreak {
151151
}
152152
}
153153

154+
impl TrieValue for IndicConjunctBreak {
155+
type TryFromU32Error = TryFromIntError;
156+
157+
fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
158+
u8::try_from(i).map(Self)
159+
}
160+
161+
fn to_u32(self) -> u32 {
162+
u32::from(self.0)
163+
}
164+
}
165+
154166
impl TrieValue for IndicSyllabicCategory {
155167
type TryFromU32Error = TryFromIntError;
156168

provider/data/properties/data/mod.rs

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/data/property_enum_indic_conjunct_break_v1.rs.data

+75
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/fingerprints.csv

+1
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ property/enum/east/asian/width/v1, <singleton>, 5028B, 4971B, cbe33fdd284572e5
7171
property/enum/general/category/v1, <singleton>, 17572B, 17516B, fa8858c0d81ead14
7272
property/enum/grapheme/cluster/break/v1, <singleton>, 7840B, 7783B, bd0d2d3da35038da
7373
property/enum/hangul/syllable/type/v1, <singleton>, 888B, 829B, 67c6d3aaab48ff19
74+
property/enum/indic/conjunct/break/v1, <singleton>, 6724B, 6667B, 855285c232bd1220
7475
property/enum/indic/syllabic/category/v1, <singleton>, 6248B, 6189B, c10d1f8e12ae80ea
7576
property/enum/joining/type/v1, <singleton>, 7072B, 7015B, dff64b1d9027a0e6
7677
property/enum/line/break/v1, <singleton>, 15284B, 15228B, ec2f73fe17e5fb1c

provider/data/properties/stubdata/mod.rs

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/data/properties/stubdata/property_enum_indic_conjunct_break_v1.rs.data

+75
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

provider/registry/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,7 @@ macro_rules! registry(
185185
icu::properties::provider::PropertyEnumGeneralCategoryV1: PropertyEnumGeneralCategoryV1,
186186
icu::properties::provider::PropertyEnumGraphemeClusterBreakV1: PropertyEnumGraphemeClusterBreakV1,
187187
icu::properties::provider::PropertyEnumHangulSyllableTypeV1: PropertyEnumHangulSyllableTypeV1,
188+
icu::properties::provider::PropertyEnumIndicConjunctBreakV1: PropertyEnumIndicConjunctBreakV1,
188189
icu::properties::provider::PropertyEnumIndicSyllabicCategoryV1: PropertyEnumIndicSyllabicCategoryV1,
189190
icu::properties::provider::PropertyEnumJoiningTypeV1: PropertyEnumJoiningTypeV1,
190191
icu::properties::provider::PropertyEnumLineBreakV1: PropertyEnumLineBreakV1,

0 commit comments

Comments
 (0)