Skip to content

upgrade icu dependency from 74.2 -> 77.1 #14386

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "723c3a253ba1470031da676c93461194e2f1f491",
"property:icuConfig": "com.ibm.icu:icu4j:74.2"
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "8e3d5182fcd87cc981977746571a5cde39b6abdc",
"property:icuConfig": "com.ibm.icu:icu4j:77.1"
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@

/**
* This file contains unicode properties used by various {@link CharTokenizer}s. The data was
* generated using ICU4J v74.2.0.0, unicode version: 15.1.0.0.
* generated using ICU4J v77.1.0.0, unicode version: 16.0.0.0.
*/
public final class UnicodeProps {
private UnicodeProps() {}

/** Unicode version that was used to generate this file: {@value} */
public static final String UNICODE_VERSION = "15.1.0.0";
public static final String UNICODE_VERSION = "16.0.0.0";

/** Bitset with Unicode WHITESPACE code points. */
public static final Bits WHITESPACE =
Expand Down
2 changes: 1 addition & 1 deletion lucene/analysis/icu/src/data/uax29/Default.rbbi
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ $ALetter = [\p{Word_Break = ALetter}];
$Single_Quote = [\p{Word_Break = Single_Quote}];
$Double_Quote = [\p{Word_Break = Double_Quote}];
$MidNumLet = [\p{Word_Break = MidNumLet}];
$MidLetter = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
$MidLetter = [\p{Word_Break = MidLetter}];
$MidNum = [\p{Word_Break = MidNum}];
$Numeric = [\p{Word_Break = Numeric}];
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];
Expand Down
1 change: 1 addition & 0 deletions lucene/analysis/icu/src/data/utr30/BasicFoldings.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
FE58>002D
FE63>002D
FF0D>002D
10D6E>002D
10EAD>002D

## Greek letterforms folding (done by kd)
Expand Down
23 changes: 20 additions & 3 deletions lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@
0D3B..0D3C>
0D4D>
0DCA>
0E3A>
0E47..0E4C>
0E4E>
0EBA>
Expand All @@ -108,9 +109,11 @@
109A..109B>
135D..135F>
1714..1715>
1734>
17C9..17D3>
17DD>
1939..193B>
1A60>
1A75..1A7C>
1A7F>
1AB0..1ABE>
Expand All @@ -119,6 +122,8 @@
1B44>
1B6B..1B73>
1BAA..1BAB>
1BE6>
1BF2..1BF3>
1C36..1C37>
1C78..1C7D>
1CD0..1CE8>
Expand Down Expand Up @@ -147,6 +152,8 @@ A6F0..A6F1>
A700..A721>
A788..A78A>
A7F8..A7F9>
A806>
A82C>
A8C4>
A8E0..A8F1>
A92B..A92E>
Expand All @@ -171,8 +178,12 @@ FFE3>
10780..10785>
10787..107B0>
107B2..107BA>
10A38..10A3A>
10A3F>
10AE5..10AE6>
10D22..10D27>
10D4E>
10D69..10D6D>
10EFD..10EFF>
10F46..10F50>
10F82..10F85>
Expand All @@ -185,10 +196,13 @@ FFE3>
111CA..111CC>
11235..11236>
112E9..112EA>
1133C>
1133B..1133C>
1134D>
11366..1136C>
11370..11374>
113CE..113D0>
113D2..113D3>
113E1..113E2>
11442>
11446>
114C2..114C3>
Expand All @@ -207,9 +221,13 @@ FFE3>
11D42>
11D44..11D45>
11D97>
11F41..11F42>
11F5A>
13447..13455>
1612F>
16AF0..16AF4>
16B30..16B36>
16D6B..16D6C>
16F8F..16F9F>
16FF0..16FF1>
1AFF0..1AFF3>
Expand All @@ -226,6 +244,7 @@ FFE3>
1E130..1E136>
1E2AE>
1E2EC..1E2EF>
1E5EE..1E5EF>
1E8D0..1E8D6>
1E944..1E946>
1E948..1E94A>
Expand Down Expand Up @@ -623,14 +642,12 @@ A7FF>004D
0829>
082A..082D>
0900>0901
1734>
1DC0..1DC3>
1DD0..1DE6>
20D0..20F0>
2DE0..2DFF>
A670..A672>
A802>
10A3F>
1D165..1D166>
1D242..1D244>

Expand Down
70 changes: 70 additions & 0 deletions lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
10D37>0037 # HANIFI ROHINGYA DIGIT SEVEN
10D38>0038 # HANIFI ROHINGYA DIGIT EIGHT
10D39>0039 # HANIFI ROHINGYA DIGIT NINE
10D40>0030 # GARAY DIGIT ZERO
10D41>0031 # GARAY DIGIT ONE
10D42>0032 # GARAY DIGIT TWO
10D43>0033 # GARAY DIGIT THREE
10D44>0034 # GARAY DIGIT FOUR
10D45>0035 # GARAY DIGIT FIVE
10D46>0036 # GARAY DIGIT SIX
10D47>0037 # GARAY DIGIT SEVEN
10D48>0038 # GARAY DIGIT EIGHT
10D49>0039 # GARAY DIGIT NINE
10E60>0031 # RUMI DIGIT ONE
10E61>0032 # RUMI DIGIT TWO
10E62>0033 # RUMI DIGIT THREE
Expand Down Expand Up @@ -560,6 +570,26 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
116C7>0037 # TAKRI DIGIT SEVEN
116C8>0038 # TAKRI DIGIT EIGHT
116C9>0039 # TAKRI DIGIT NINE
116D0>0030 # MYANMAR PAO DIGIT ZERO
116D1>0031 # MYANMAR PAO DIGIT ONE
116D2>0032 # MYANMAR PAO DIGIT TWO
116D3>0033 # MYANMAR PAO DIGIT THREE
116D4>0034 # MYANMAR PAO DIGIT FOUR
116D5>0035 # MYANMAR PAO DIGIT FIVE
116D6>0036 # MYANMAR PAO DIGIT SIX
116D7>0037 # MYANMAR PAO DIGIT SEVEN
116D8>0038 # MYANMAR PAO DIGIT EIGHT
116D9>0039 # MYANMAR PAO DIGIT NINE
116DA>0030 # MYANMAR EASTERN PWO KAREN DIGIT ZERO
116DB>0031 # MYANMAR EASTERN PWO KAREN DIGIT ONE
116DC>0032 # MYANMAR EASTERN PWO KAREN DIGIT TWO
116DD>0033 # MYANMAR EASTERN PWO KAREN DIGIT THREE
116DE>0034 # MYANMAR EASTERN PWO KAREN DIGIT FOUR
116DF>0035 # MYANMAR EASTERN PWO KAREN DIGIT FIVE
116E0>0036 # MYANMAR EASTERN PWO KAREN DIGIT SIX
116E1>0037 # MYANMAR EASTERN PWO KAREN DIGIT SEVEN
116E2>0038 # MYANMAR EASTERN PWO KAREN DIGIT EIGHT
116E3>0039 # MYANMAR EASTERN PWO KAREN DIGIT NINE
11730>0030 # AHOM DIGIT ZERO
11731>0031 # AHOM DIGIT ONE
11732>0032 # AHOM DIGIT TWO
Expand Down Expand Up @@ -590,6 +620,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
11957>0037 # DIVES AKURU DIGIT SEVEN
11958>0038 # DIVES AKURU DIGIT EIGHT
11959>0039 # DIVES AKURU DIGIT NINE
11BF0>0030 # SUNUWAR DIGIT ZERO
11BF1>0031 # SUNUWAR DIGIT ONE
11BF2>0032 # SUNUWAR DIGIT TWO
11BF3>0033 # SUNUWAR DIGIT THREE
11BF4>0034 # SUNUWAR DIGIT FOUR
11BF5>0035 # SUNUWAR DIGIT FIVE
11BF6>0036 # SUNUWAR DIGIT SIX
11BF7>0037 # SUNUWAR DIGIT SEVEN
11BF8>0038 # SUNUWAR DIGIT EIGHT
11BF9>0039 # SUNUWAR DIGIT NINE
11C50>0030 # BHAIKSUKI DIGIT ZERO
11C51>0031 # BHAIKSUKI DIGIT ONE
11C52>0032 # BHAIKSUKI DIGIT TWO
Expand Down Expand Up @@ -630,6 +670,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
11F57>0037 # KAWI DIGIT SEVEN
11F58>0038 # KAWI DIGIT EIGHT
11F59>0039 # KAWI DIGIT NINE
16130>0030 # GURUNG KHEMA DIGIT ZERO
16131>0031 # GURUNG KHEMA DIGIT ONE
16132>0032 # GURUNG KHEMA DIGIT TWO
16133>0033 # GURUNG KHEMA DIGIT THREE
16134>0034 # GURUNG KHEMA DIGIT FOUR
16135>0035 # GURUNG KHEMA DIGIT FIVE
16136>0036 # GURUNG KHEMA DIGIT SIX
16137>0037 # GURUNG KHEMA DIGIT SEVEN
16138>0038 # GURUNG KHEMA DIGIT EIGHT
16139>0039 # GURUNG KHEMA DIGIT NINE
16A60>0030 # MRO DIGIT ZERO
16A61>0031 # MRO DIGIT ONE
16A62>0032 # MRO DIGIT TWO
Expand Down Expand Up @@ -660,6 +710,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
16B59>0039 # PAHAWH HMONG DIGIT NINE
16D70>0030 # KIRAT RAI DIGIT ZERO
16D71>0031 # KIRAT RAI DIGIT ONE
16D72>0032 # KIRAT RAI DIGIT TWO
16D73>0033 # KIRAT RAI DIGIT THREE
16D74>0034 # KIRAT RAI DIGIT FOUR
16D75>0035 # KIRAT RAI DIGIT FIVE
16D76>0036 # KIRAT RAI DIGIT SIX
16D77>0037 # KIRAT RAI DIGIT SEVEN
16D78>0038 # KIRAT RAI DIGIT EIGHT
16D79>0039 # KIRAT RAI DIGIT NINE
1E140>0030 # NYIAKENG PUACHUE HMONG DIGIT ZERO
1E141>0031 # NYIAKENG PUACHUE HMONG DIGIT ONE
1E142>0032 # NYIAKENG PUACHUE HMONG DIGIT TWO
Expand Down Expand Up @@ -690,6 +750,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
1E4F7>0037 # NAG MUNDARI DIGIT SEVEN
1E4F8>0038 # NAG MUNDARI DIGIT EIGHT
1E4F9>0039 # NAG MUNDARI DIGIT NINE
1E5F1>0030 # OL ONAL DIGIT ZERO
1E5F2>0031 # OL ONAL DIGIT ONE
1E5F3>0032 # OL ONAL DIGIT TWO
1E5F4>0033 # OL ONAL DIGIT THREE
1E5F5>0034 # OL ONAL DIGIT FOUR
1E5F6>0035 # OL ONAL DIGIT FIVE
1E5F7>0036 # OL ONAL DIGIT SIX
1E5F8>0037 # OL ONAL DIGIT SEVEN
1E5F9>0038 # OL ONAL DIGIT EIGHT
1E5FA>0039 # OL ONAL DIGIT NINE
1E950>0030 # ADLAM DIGIT ZERO
1E951>0031 # ADLAM DIGIT ONE
1E952>0032 # ADLAM DIGIT TWO
Expand Down
29 changes: 27 additions & 2 deletions lucene/analysis/icu/src/data/utr30/nfc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#
# Complete data for Unicode NFC normalization.

* Unicode 15.1.0
* Unicode 16.0.0

# Canonical_Combining_Class (ccc) values
0300..0314:230
Expand Down Expand Up @@ -132,7 +132,7 @@
0825..0827:230
0829..082D:230
0859..085B:220
0898:230
0897..0898:230
0899..089B:220
089C..089F:230
08CA..08CE:230
Expand Down Expand Up @@ -319,6 +319,7 @@ FE2E..FE2F:230
10AE5:230
10AE6:220
10D24..10D27:230
10D69..10D6D:230
10EAB..10EAC:230
10EFD..10EFF:220
10F46..10F47:220
Expand Down Expand Up @@ -348,6 +349,7 @@ FE2E..FE2F:230
1134D:9
11366..1136C:230
11370..11374:230
113CE..113D0:9
11442:9
11446:7
1145E:230
Expand All @@ -372,6 +374,7 @@ FE2E..FE2F:230
11D44..11D45:9
11D97:9
11F41..11F42:9
1612F:9
16AF0..16AF4:1
16B30..16B36:230
16FF0..16FF1:6
Expand All @@ -397,6 +400,8 @@ FE2E..FE2F:230
1E4EC..1E4ED:232
1E4EE:220
1E4EF:230
1E5EE:230
1E5EF:220
1E8D0..1E8D6:220
1E944..1E949:230
1E94A:7
Expand Down Expand Up @@ -1895,19 +1900,39 @@ FB4B>05D5 05B9
FB4C>05D1 05BF
FB4D>05DB 05BF
FB4E>05E4 05BF
105C9>105D2 0307 # one-way: diacritic 0307
105E4>105DA 0307 # one-way: diacritic 0307
1109A>11099 110BA # one-way: diacritic 110BA
1109C>1109B 110BA # one-way: diacritic 110BA
110AB>110A5 110BA # one-way: diacritic 110BA
1112E=11131 11127
1112F=11132 11127
1134B=11347 1133E
1134C=11347 11357
11383=11382 113C9
11385=11384 113BB
1138E=1138B 113C2
11391=11390 113C9
113C5=113C2 113C2
113C7=113C2 113B8
113C8=113C2 113C9
114BB=114B9 114BA
114BC=114B9 114B0
114BE=114B9 114BD
115BA=115B8 115AF
115BB=115B9 115AF
11938=11935 11930
16121=1611E 1611E
16122=1611E 16129
16123=1611E 1611F
16124=16129 1611F
16125=1611E 16120
16126=16121 1611F
16127=16122 1611F
16128=16121 16120
16D68=16D67 16D67
16D69=16D63 16D67
16D6A=16D69 16D67
1D15E>1D157 1D165
1D15F>1D158 1D165
1D160>1D15F 1D16E
Expand Down
38 changes: 37 additions & 1 deletion lucene/analysis/icu/src/data/utr30/nfkc.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# to NFKC one-way mappings.
# Use this file as the second gennorm2 input file after nfc.txt.

* Unicode 15.1.0
* Unicode 16.0.0

00A0>0020
00A8>0020 0308
Expand Down Expand Up @@ -2478,6 +2478,42 @@ FFEE>25CB
107B8>01C2
107B9>1DF0A
107BA>1DF1E
1CCD6>0041
1CCD7>0042
1CCD8>0043
1CCD9>0044
1CCDA>0045
1CCDB>0046
1CCDC>0047
1CCDD>0048
1CCDE>0049
1CCDF>004A
1CCE0>004B
1CCE1>004C
1CCE2>004D
1CCE3>004E
1CCE4>004F
1CCE5>0050
1CCE6>0051
1CCE7>0052
1CCE8>0053
1CCE9>0054
1CCEA>0055
1CCEB>0056
1CCEC>0057
1CCED>0058
1CCEE>0059
1CCEF>005A
1CCF0>0030
1CCF1>0031
1CCF2>0032
1CCF3>0033
1CCF4>0034
1CCF5>0035
1CCF6>0036
1CCF7>0037
1CCF8>0038
1CCF9>0039
1D400>0041
1D401>0042
1D402>0043
Expand Down
Loading