25
25
import java .util .HashSet ;
26
26
import java .util .Iterator ;
27
27
import java .util .List ;
28
+ import java .util .Locale ;
28
29
import java .util .Set ;
30
+ import java .util .stream .Collectors ;
31
+
29
32
import org .apache .lucene .tests .util .LuceneTestCase ;
30
33
import org .apache .lucene .tests .util .TestUtil ;
31
34
import org .apache .lucene .tests .util .automaton .AutomatonTestUtil ;
@@ -42,16 +45,25 @@ public void testBasic() throws Exception {
42
45
List <BytesRef > terms = basicTerms ();
43
46
Collections .sort (terms );
44
47
45
- Automaton a = build (terms , false );
48
+ Automaton a = build (terms , false , false );
46
49
checkAutomaton (terms , a , false );
47
50
checkMinimized (a );
48
51
}
49
52
53
+ public void testCaseInsensitive () throws Exception {
54
+ List <BytesRef > terms = basicTerms ();
55
+ Collections .sort (terms );
56
+
57
+ Automaton a = build (terms , false , true );
58
+ checkAutomaton (terms , a , false , true );
59
+ checkMinimized (a );
60
+ }
61
+
50
62
public void testBasicBinary () throws Exception {
51
63
List <BytesRef > terms = basicTerms ();
52
64
Collections .sort (terms );
53
65
54
- Automaton a = build (terms , true );
66
+ Automaton a = build (terms , true , false );
55
67
checkAutomaton (terms , a , true );
56
68
checkMinimized (a );
57
69
}
@@ -79,7 +91,52 @@ public void testRandomMinimized() throws Exception {
79
91
Automaton expected =
80
92
MinimizationOperations .minimize (
81
93
Operations .union (automatonList ), Operations .DEFAULT_DETERMINIZE_WORK_LIMIT );
82
- Automaton actual = build (sortedTerms , buildBinary );
94
+ Automaton actual = build (sortedTerms , buildBinary , false );
95
+ assertSameAutomaton (expected , actual );
96
+ }
97
+ }
98
+
99
+ public void testRandomMinimizedCaseInsensitive () throws Exception {
100
+ int iters = RandomizedTest .isNightly () ? 20 : 5 ;
101
+ for (int i = 0 ; i < iters ; i ++) {
102
+ int size = random ().nextInt (2 , 50 );
103
+ Set <BytesRef > terms = new HashSet <>();
104
+ List <Automaton > automatonList = new ArrayList <>(size );
105
+ for (int j = 0 ; j < size ; j ++) {
106
+ String s = TestUtil .randomRealisticUnicodeString (random (), 8 );
107
+ terms .add (newBytesRef (s ));
108
+ List <Automaton > charAutomata = s .codePoints ().mapToObj (c -> {
109
+ List <Automaton > caseAutomata = new ArrayList <>();
110
+ caseAutomata .add (Automata .makeChar (c ));
111
+ int [] alternates = CaseFolding .lookupAlternates (c );
112
+ if (alternates != null ) {
113
+ for (int alt : alternates ) {
114
+ caseAutomata .add (Automata .makeChar (alt ));
115
+ }
116
+ } else {
117
+ int altCase =
118
+ Character .isLowerCase (c )
119
+ ? Character .toUpperCase (c )
120
+ : Character .toLowerCase (c );
121
+
122
+ if (altCase != c ) {
123
+ caseAutomata .add (Automata .makeChar (altCase ));
124
+ }
125
+ }
126
+ return Operations .union (caseAutomata );
127
+ }).collect (Collectors .toList ());
128
+ if (charAutomata .isEmpty ()) {
129
+ automatonList .add (Automata .makeEmptyString ());
130
+ } else {
131
+ automatonList .add (Operations .concatenate (charAutomata ));
132
+ }
133
+ }
134
+ List <BytesRef > sortedTerms = terms .stream ().sorted ().toList ();
135
+
136
+ Automaton expected =
137
+ MinimizationOperations .minimize (
138
+ Operations .union (automatonList ), Operations .DEFAULT_DETERMINIZE_WORK_LIMIT );
139
+ Automaton actual = build (sortedTerms , false , true );
83
140
assertSameAutomaton (expected , actual );
84
141
}
85
142
}
@@ -98,7 +155,7 @@ public void testLargeTerms() throws Exception {
98
155
IllegalArgumentException e =
99
156
expectThrows (
100
157
IllegalArgumentException .class ,
101
- () -> build (Collections .singleton (new BytesRef (b10k )), false ));
158
+ () -> build (Collections .singleton (new BytesRef (b10k )), false , false ));
102
159
assertTrue (
103
160
e .getMessage ()
104
161
.startsWith (
@@ -107,7 +164,7 @@ public void testLargeTerms() throws Exception {
107
164
+ " UTF-8 bytes" ));
108
165
109
166
byte [] b1k = ArrayUtil .copyOfSubArray (b10k , 0 , 1000 );
110
- build (Collections .singleton (new BytesRef (b1k )), false ); // no exception
167
+ build (Collections .singleton (new BytesRef (b1k )), false , false ); // no exception
111
168
}
112
169
113
170
private void testRandom (boolean allowBinary ) throws Exception {
@@ -125,12 +182,16 @@ private void testRandom(boolean allowBinary) throws Exception {
125
182
}
126
183
127
184
List <BytesRef > sorted = terms .stream ().sorted ().toList ();
128
- Automaton a = build (sorted , allowBinary );
185
+ Automaton a = build (sorted , allowBinary , false );
129
186
checkAutomaton (sorted , a , allowBinary );
130
187
}
131
188
}
132
189
133
190
private void checkAutomaton (List <BytesRef > expected , Automaton a , boolean isBinary ) {
191
+ checkAutomaton (expected , a , isBinary , false );
192
+ }
193
+
194
+ private void checkAutomaton (List <BytesRef > expected , Automaton a , boolean isBinary , boolean caseInsensitive ) {
134
195
CompiledAutomaton c = new CompiledAutomaton (a , true , false , isBinary );
135
196
ByteRunAutomaton runAutomaton = c .runAutomaton ;
136
197
@@ -141,12 +202,14 @@ private void checkAutomaton(List<BytesRef> expected, Automaton a, boolean isBina
141
202
readable + " should be found but wasn't" , runAutomaton .run (t .bytes , t .offset , t .length ));
142
203
}
143
204
144
- // Make sure every term produced by the automaton is expected
145
- BytesRefBuilder scratch = new BytesRefBuilder ();
146
- FiniteStringsIterator it = new FiniteStringsIterator (c .automaton );
147
- for (IntsRef r = it .next (); r != null ; r = it .next ()) {
148
- BytesRef t = Util .toBytesRef (r , scratch );
149
- assertTrue (expected .contains (t ));
205
+ if (caseInsensitive == false ) {
206
+ // Make sure every term produced by the automaton is expected
207
+ BytesRefBuilder scratch = new BytesRefBuilder ();
208
+ FiniteStringsIterator it = new FiniteStringsIterator (c .automaton );
209
+ for (IntsRef r = it .next (); r != null ; r = it .next ()) {
210
+ BytesRef t = Util .toBytesRef (r , scratch );
211
+ assertTrue (expected .contains (t ));
212
+ }
150
213
}
151
214
}
152
215
@@ -172,11 +235,11 @@ private List<BytesRef> basicTerms() {
172
235
return terms ;
173
236
}
174
237
175
- private Automaton build (Collection <BytesRef > terms , boolean asBinary ) throws IOException {
238
+ private Automaton build (Collection <BytesRef > terms , boolean asBinary , boolean caseInsensitive ) throws IOException {
176
239
if (random ().nextBoolean ()) {
177
- return StringsToAutomaton .build (terms , asBinary );
240
+ return StringsToAutomaton .build (terms , asBinary , caseInsensitive );
178
241
} else {
179
- return StringsToAutomaton .build (new TermIterator (terms ), asBinary );
242
+ return StringsToAutomaton .build (new TermIterator (terms ), asBinary , caseInsensitive );
180
243
}
181
244
}
182
245
0 commit comments