Skip to content

Commit 1220597

Browse files
committed
upgrade icu dependency from 74.2 -> 77.1 (#14386)
Bump dependency Regenerate locks Update licenses Regenerate everything ICUFoldingFilter: Remove special-cases from Diacritic Folding, these now have `:Diacritic=Yes:`: * U+1734 HANUNOO SIGN PAMUDPOD * U+10A3F KHAROSHTHI VIRAMA ICUTokenizer: Synchronize Default.rbbi to reflect upstream ICU-22941 wordbreak updates to `MidLetter`: * U+003A COLON * U+FE55 SMALL COLON * U+FF1A FULLWIDTH COLON "These tailorings were introduced in ICU 72, but feedback has been negative, and the UTC declined to adopt these changes."
1 parent bd85c24 commit 1220597

File tree

21 files changed

+246
-31
lines changed

21 files changed

+246
-31
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ Other
184184

185185
* GITHUB#14372: Reduce visibility of NRTSuggester#load from public to package private (Luca Cavanna)
186186

187+
* GITHUB#14386: Bump analysis/icu's icu4j dependency to 77.1 (Robert Muir)
188+
187189
======================= Lucene 10.1.0 =======================
188190

189191
API Changes
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
{
2-
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "723c3a253ba1470031da676c93461194e2f1f491",
3-
"property:icuConfig": "com.ibm.icu:icu4j:74.2"
2+
"lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java": "8e3d5182fcd87cc981977746571a5cde39b6abdc",
3+
"property:icuConfig": "com.ibm.icu:icu4j:77.1"
44
}

lucene/analysis/common/src/java/org/apache/lucene/analysis/util/UnicodeProps.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,13 @@
2424

2525
/**
2626
* This file contains unicode properties used by various {@link CharTokenizer}s. The data was
27-
* generated using ICU4J v74.2.0.0, unicode version: 15.1.0.0.
27+
* generated using ICU4J v77.1.0.0, unicode version: 16.0.0.0.
2828
*/
2929
public final class UnicodeProps {
3030
private UnicodeProps() {}
3131

3232
/** Unicode version that was used to generate this file: {@value} */
33-
public static final String UNICODE_VERSION = "15.1.0.0";
33+
public static final String UNICODE_VERSION = "16.0.0.0";
3434

3535
/** Bitset with Unicode WHITESPACE code points. */
3636
public static final Bits WHITESPACE =

lucene/analysis/icu/src/data/uax29/Default.rbbi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ $ALetter = [\p{Word_Break = ALetter}];
6666
$Single_Quote = [\p{Word_Break = Single_Quote}];
6767
$Double_Quote = [\p{Word_Break = Double_Quote}];
6868
$MidNumLet = [\p{Word_Break = MidNumLet}];
69-
$MidLetter = [\p{Word_Break = MidLetter} - [\: \uFE55 \uFF1A]];
69+
$MidLetter = [\p{Word_Break = MidLetter}];
7070
$MidNum = [\p{Word_Break = MidNum}];
7171
$Numeric = [\p{Word_Break = Numeric}];
7272
$ExtendNumLet = [\p{Word_Break = ExtendNumLet}];

lucene/analysis/icu/src/data/utr30/BasicFoldings.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
FE58>002D
5858
FE63>002D
5959
FF0D>002D
60+
10D6E>002D
6061
10EAD>002D
6162

6263
## Greek letterforms folding (done by kd)

lucene/analysis/icu/src/data/utr30/DiacriticFolding.txt

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@
8787
0D3B..0D3C>
8888
0D4D>
8989
0DCA>
90+
0E3A>
9091
0E47..0E4C>
9192
0E4E>
9293
0EBA>
@@ -108,9 +109,11 @@
108109
109A..109B>
109110
135D..135F>
110111
1714..1715>
112+
1734>
111113
17C9..17D3>
112114
17DD>
113115
1939..193B>
116+
1A60>
114117
1A75..1A7C>
115118
1A7F>
116119
1AB0..1ABE>
@@ -119,6 +122,8 @@
119122
1B44>
120123
1B6B..1B73>
121124
1BAA..1BAB>
125+
1BE6>
126+
1BF2..1BF3>
122127
1C36..1C37>
123128
1C78..1C7D>
124129
1CD0..1CE8>
@@ -147,6 +152,8 @@ A6F0..A6F1>
147152
A700..A721>
148153
A788..A78A>
149154
A7F8..A7F9>
155+
A806>
156+
A82C>
150157
A8C4>
151158
A8E0..A8F1>
152159
A92B..A92E>
@@ -171,8 +178,12 @@ FFE3>
171178
10780..10785>
172179
10787..107B0>
173180
107B2..107BA>
181+
10A38..10A3A>
182+
10A3F>
174183
10AE5..10AE6>
175184
10D22..10D27>
185+
10D4E>
186+
10D69..10D6D>
176187
10EFD..10EFF>
177188
10F46..10F50>
178189
10F82..10F85>
@@ -185,10 +196,13 @@ FFE3>
185196
111CA..111CC>
186197
11235..11236>
187198
112E9..112EA>
188-
1133C>
199+
1133B..1133C>
189200
1134D>
190201
11366..1136C>
191202
11370..11374>
203+
113CE..113D0>
204+
113D2..113D3>
205+
113E1..113E2>
192206
11442>
193207
11446>
194208
114C2..114C3>
@@ -207,9 +221,13 @@ FFE3>
207221
11D42>
208222
11D44..11D45>
209223
11D97>
224+
11F41..11F42>
225+
11F5A>
210226
13447..13455>
227+
1612F>
211228
16AF0..16AF4>
212229
16B30..16B36>
230+
16D6B..16D6C>
213231
16F8F..16F9F>
214232
16FF0..16FF1>
215233
1AFF0..1AFF3>
@@ -226,6 +244,7 @@ FFE3>
226244
1E130..1E136>
227245
1E2AE>
228246
1E2EC..1E2EF>
247+
1E5EE..1E5EF>
229248
1E8D0..1E8D6>
230249
1E944..1E946>
231250
1E948..1E94A>
@@ -623,14 +642,12 @@ A7FF>004D
623642
0829>
624643
082A..082D>
625644
0900>0901
626-
1734>
627645
1DC0..1DC3>
628646
1DD0..1DE6>
629647
20D0..20F0>
630648
2DE0..2DFF>
631649
A670..A672>
632650
A802>
633-
10A3F>
634651
1D165..1D166>
635652
1D242..1D244>
636653

lucene/analysis/icu/src/data/utr30/NativeDigitFolding.txt

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -452,6 +452,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
452452
10D37>0037 # HANIFI ROHINGYA DIGIT SEVEN
453453
10D38>0038 # HANIFI ROHINGYA DIGIT EIGHT
454454
10D39>0039 # HANIFI ROHINGYA DIGIT NINE
455+
10D40>0030 # GARAY DIGIT ZERO
456+
10D41>0031 # GARAY DIGIT ONE
457+
10D42>0032 # GARAY DIGIT TWO
458+
10D43>0033 # GARAY DIGIT THREE
459+
10D44>0034 # GARAY DIGIT FOUR
460+
10D45>0035 # GARAY DIGIT FIVE
461+
10D46>0036 # GARAY DIGIT SIX
462+
10D47>0037 # GARAY DIGIT SEVEN
463+
10D48>0038 # GARAY DIGIT EIGHT
464+
10D49>0039 # GARAY DIGIT NINE
455465
10E60>0031 # RUMI DIGIT ONE
456466
10E61>0032 # RUMI DIGIT TWO
457467
10E62>0033 # RUMI DIGIT THREE
@@ -560,6 +570,26 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
560570
116C7>0037 # TAKRI DIGIT SEVEN
561571
116C8>0038 # TAKRI DIGIT EIGHT
562572
116C9>0039 # TAKRI DIGIT NINE
573+
116D0>0030 # MYANMAR PAO DIGIT ZERO
574+
116D1>0031 # MYANMAR PAO DIGIT ONE
575+
116D2>0032 # MYANMAR PAO DIGIT TWO
576+
116D3>0033 # MYANMAR PAO DIGIT THREE
577+
116D4>0034 # MYANMAR PAO DIGIT FOUR
578+
116D5>0035 # MYANMAR PAO DIGIT FIVE
579+
116D6>0036 # MYANMAR PAO DIGIT SIX
580+
116D7>0037 # MYANMAR PAO DIGIT SEVEN
581+
116D8>0038 # MYANMAR PAO DIGIT EIGHT
582+
116D9>0039 # MYANMAR PAO DIGIT NINE
583+
116DA>0030 # MYANMAR EASTERN PWO KAREN DIGIT ZERO
584+
116DB>0031 # MYANMAR EASTERN PWO KAREN DIGIT ONE
585+
116DC>0032 # MYANMAR EASTERN PWO KAREN DIGIT TWO
586+
116DD>0033 # MYANMAR EASTERN PWO KAREN DIGIT THREE
587+
116DE>0034 # MYANMAR EASTERN PWO KAREN DIGIT FOUR
588+
116DF>0035 # MYANMAR EASTERN PWO KAREN DIGIT FIVE
589+
116E0>0036 # MYANMAR EASTERN PWO KAREN DIGIT SIX
590+
116E1>0037 # MYANMAR EASTERN PWO KAREN DIGIT SEVEN
591+
116E2>0038 # MYANMAR EASTERN PWO KAREN DIGIT EIGHT
592+
116E3>0039 # MYANMAR EASTERN PWO KAREN DIGIT NINE
563593
11730>0030 # AHOM DIGIT ZERO
564594
11731>0031 # AHOM DIGIT ONE
565595
11732>0032 # AHOM DIGIT TWO
@@ -590,6 +620,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
590620
11957>0037 # DIVES AKURU DIGIT SEVEN
591621
11958>0038 # DIVES AKURU DIGIT EIGHT
592622
11959>0039 # DIVES AKURU DIGIT NINE
623+
11BF0>0030 # SUNUWAR DIGIT ZERO
624+
11BF1>0031 # SUNUWAR DIGIT ONE
625+
11BF2>0032 # SUNUWAR DIGIT TWO
626+
11BF3>0033 # SUNUWAR DIGIT THREE
627+
11BF4>0034 # SUNUWAR DIGIT FOUR
628+
11BF5>0035 # SUNUWAR DIGIT FIVE
629+
11BF6>0036 # SUNUWAR DIGIT SIX
630+
11BF7>0037 # SUNUWAR DIGIT SEVEN
631+
11BF8>0038 # SUNUWAR DIGIT EIGHT
632+
11BF9>0039 # SUNUWAR DIGIT NINE
593633
11C50>0030 # BHAIKSUKI DIGIT ZERO
594634
11C51>0031 # BHAIKSUKI DIGIT ONE
595635
11C52>0032 # BHAIKSUKI DIGIT TWO
@@ -630,6 +670,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
630670
11F57>0037 # KAWI DIGIT SEVEN
631671
11F58>0038 # KAWI DIGIT EIGHT
632672
11F59>0039 # KAWI DIGIT NINE
673+
16130>0030 # GURUNG KHEMA DIGIT ZERO
674+
16131>0031 # GURUNG KHEMA DIGIT ONE
675+
16132>0032 # GURUNG KHEMA DIGIT TWO
676+
16133>0033 # GURUNG KHEMA DIGIT THREE
677+
16134>0034 # GURUNG KHEMA DIGIT FOUR
678+
16135>0035 # GURUNG KHEMA DIGIT FIVE
679+
16136>0036 # GURUNG KHEMA DIGIT SIX
680+
16137>0037 # GURUNG KHEMA DIGIT SEVEN
681+
16138>0038 # GURUNG KHEMA DIGIT EIGHT
682+
16139>0039 # GURUNG KHEMA DIGIT NINE
633683
16A60>0030 # MRO DIGIT ZERO
634684
16A61>0031 # MRO DIGIT ONE
635685
16A62>0032 # MRO DIGIT TWO
@@ -660,6 +710,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
660710
16B57>0037 # PAHAWH HMONG DIGIT SEVEN
661711
16B58>0038 # PAHAWH HMONG DIGIT EIGHT
662712
16B59>0039 # PAHAWH HMONG DIGIT NINE
713+
16D70>0030 # KIRAT RAI DIGIT ZERO
714+
16D71>0031 # KIRAT RAI DIGIT ONE
715+
16D72>0032 # KIRAT RAI DIGIT TWO
716+
16D73>0033 # KIRAT RAI DIGIT THREE
717+
16D74>0034 # KIRAT RAI DIGIT FOUR
718+
16D75>0035 # KIRAT RAI DIGIT FIVE
719+
16D76>0036 # KIRAT RAI DIGIT SIX
720+
16D77>0037 # KIRAT RAI DIGIT SEVEN
721+
16D78>0038 # KIRAT RAI DIGIT EIGHT
722+
16D79>0039 # KIRAT RAI DIGIT NINE
663723
1E140>0030 # NYIAKENG PUACHUE HMONG DIGIT ZERO
664724
1E141>0031 # NYIAKENG PUACHUE HMONG DIGIT ONE
665725
1E142>0032 # NYIAKENG PUACHUE HMONG DIGIT TWO
@@ -690,6 +750,16 @@ ABF9>0039 # MEETEI MAYEK DIGIT NINE
690750
1E4F7>0037 # NAG MUNDARI DIGIT SEVEN
691751
1E4F8>0038 # NAG MUNDARI DIGIT EIGHT
692752
1E4F9>0039 # NAG MUNDARI DIGIT NINE
753+
1E5F1>0030 # OL ONAL DIGIT ZERO
754+
1E5F2>0031 # OL ONAL DIGIT ONE
755+
1E5F3>0032 # OL ONAL DIGIT TWO
756+
1E5F4>0033 # OL ONAL DIGIT THREE
757+
1E5F5>0034 # OL ONAL DIGIT FOUR
758+
1E5F6>0035 # OL ONAL DIGIT FIVE
759+
1E5F7>0036 # OL ONAL DIGIT SIX
760+
1E5F8>0037 # OL ONAL DIGIT SEVEN
761+
1E5F9>0038 # OL ONAL DIGIT EIGHT
762+
1E5FA>0039 # OL ONAL DIGIT NINE
693763
1E950>0030 # ADLAM DIGIT ZERO
694764
1E951>0031 # ADLAM DIGIT ONE
695765
1E952>0032 # ADLAM DIGIT TWO

lucene/analysis/icu/src/data/utr30/nfc.txt

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#
1010
# Complete data for Unicode NFC normalization.
1111

12-
* Unicode 15.1.0
12+
* Unicode 16.0.0
1313

1414
# Canonical_Combining_Class (ccc) values
1515
0300..0314:230
@@ -132,7 +132,7 @@
132132
0825..0827:230
133133
0829..082D:230
134134
0859..085B:220
135-
0898:230
135+
0897..0898:230
136136
0899..089B:220
137137
089C..089F:230
138138
08CA..08CE:230
@@ -319,6 +319,7 @@ FE2E..FE2F:230
319319
10AE5:230
320320
10AE6:220
321321
10D24..10D27:230
322+
10D69..10D6D:230
322323
10EAB..10EAC:230
323324
10EFD..10EFF:220
324325
10F46..10F47:220
@@ -348,6 +349,7 @@ FE2E..FE2F:230
348349
1134D:9
349350
11366..1136C:230
350351
11370..11374:230
352+
113CE..113D0:9
351353
11442:9
352354
11446:7
353355
1145E:230
@@ -372,6 +374,7 @@ FE2E..FE2F:230
372374
11D44..11D45:9
373375
11D97:9
374376
11F41..11F42:9
377+
1612F:9
375378
16AF0..16AF4:1
376379
16B30..16B36:230
377380
16FF0..16FF1:6
@@ -397,6 +400,8 @@ FE2E..FE2F:230
397400
1E4EC..1E4ED:232
398401
1E4EE:220
399402
1E4EF:230
403+
1E5EE:230
404+
1E5EF:220
400405
1E8D0..1E8D6:220
401406
1E944..1E949:230
402407
1E94A:7
@@ -1895,19 +1900,39 @@ FB4B>05D5 05B9
18951900
FB4C>05D1 05BF
18961901
FB4D>05DB 05BF
18971902
FB4E>05E4 05BF
1903+
105C9>105D2 0307 # one-way: diacritic 0307
1904+
105E4>105DA 0307 # one-way: diacritic 0307
18981905
1109A>11099 110BA # one-way: diacritic 110BA
18991906
1109C>1109B 110BA # one-way: diacritic 110BA
19001907
110AB>110A5 110BA # one-way: diacritic 110BA
19011908
1112E=11131 11127
19021909
1112F=11132 11127
19031910
1134B=11347 1133E
19041911
1134C=11347 11357
1912+
11383=11382 113C9
1913+
11385=11384 113BB
1914+
1138E=1138B 113C2
1915+
11391=11390 113C9
1916+
113C5=113C2 113C2
1917+
113C7=113C2 113B8
1918+
113C8=113C2 113C9
19051919
114BB=114B9 114BA
19061920
114BC=114B9 114B0
19071921
114BE=114B9 114BD
19081922
115BA=115B8 115AF
19091923
115BB=115B9 115AF
19101924
11938=11935 11930
1925+
16121=1611E 1611E
1926+
16122=1611E 16129
1927+
16123=1611E 1611F
1928+
16124=16129 1611F
1929+
16125=1611E 16120
1930+
16126=16121 1611F
1931+
16127=16122 1611F
1932+
16128=16121 16120
1933+
16D68=16D67 16D67
1934+
16D69=16D63 16D67
1935+
16D6A=16D69 16D67
19111936
1D15E>1D157 1D165
19121937
1D15F>1D158 1D165
19131938
1D160>1D15F 1D16E

0 commit comments

Comments
 (0)