30
30
*/
31
31
final class DenseConjunctionBulkScorer extends BulkScorer {
32
32
33
+ private record DisiWrapper (DocIdSetIterator approximation , TwoPhaseIterator twoPhase ) {
34
+ DisiWrapper (DocIdSetIterator iterator ) {
35
+ this (iterator , null );
36
+ }
37
+
38
+ DisiWrapper (TwoPhaseIterator twoPhase ) {
39
+ this (twoPhase .approximation (), twoPhase );
40
+ }
41
+
42
+ int docID () {
43
+ return approximation ().docID ();
44
+ }
45
+
46
+ int docIDRunEnd () throws IOException {
47
+ if (twoPhase () == null ) {
48
+ return approximation ().docIDRunEnd ();
49
+ } else {
50
+ return twoPhase ().docIDRunEnd ();
51
+ }
52
+ }
53
+ }
54
+
33
55
// Use a small-ish window size to make sure that we can take advantage of gaps in the postings of
34
56
// clauses that are not leading iteration.
35
57
static final int WINDOW_SIZE = 4096 ;
@@ -39,25 +61,49 @@ final class DenseConjunctionBulkScorer extends BulkScorer {
39
61
static final int DENSITY_THRESHOLD_INVERSE = Long .SIZE / 2 ;
40
62
41
63
private final int maxDoc ;
42
- private final List <DocIdSetIterator > iterators ;
64
+ private final List <DisiWrapper > iterators ;
43
65
private final SimpleScorable scorable ;
44
66
45
67
private final FixedBitSet windowMatches = new FixedBitSet (WINDOW_SIZE );
46
68
private final FixedBitSet clauseWindowMatches = new FixedBitSet (WINDOW_SIZE );
47
- private final List <DocIdSetIterator > windowIterators = new ArrayList <>();
69
+ private final List <DocIdSetIterator > windowApproximations = new ArrayList <>();
70
+ private final List <TwoPhaseIterator > windowTwoPhases = new ArrayList <>();
48
71
private final DocIdStreamView docIdStreamView = new DocIdStreamView ();
49
72
private final RangeDocIdStream rangeDocIdStream = new RangeDocIdStream ();
50
73
private final SingleIteratorDocIdStream singleIteratorDocIdStream =
51
74
new SingleIteratorDocIdStream ();
52
75
53
- DenseConjunctionBulkScorer (List <DocIdSetIterator > iterators , int maxDoc , float constantScore ) {
54
- if (iterators .isEmpty ()) {
76
+ static DenseConjunctionBulkScorer of (List <Scorer > filters , int maxDoc , float constantScore ) {
77
+ List <DocIdSetIterator > iterators = new ArrayList <>();
78
+ List <TwoPhaseIterator > twoPhases = new ArrayList <>();
79
+ for (Scorer filter : filters ) {
80
+ TwoPhaseIterator twoPhase = filter .twoPhaseIterator ();
81
+ if (twoPhase != null ) {
82
+ twoPhases .add (twoPhase );
83
+ } else {
84
+ iterators .add (filter .iterator ());
85
+ }
86
+ }
87
+ return new DenseConjunctionBulkScorer (iterators , twoPhases , maxDoc , constantScore );
88
+ }
89
+
90
+ DenseConjunctionBulkScorer (
91
+ List <DocIdSetIterator > iterators ,
92
+ List <TwoPhaseIterator > twoPhases ,
93
+ int maxDoc ,
94
+ float constantScore ) {
95
+ if (iterators .isEmpty () && twoPhases .isEmpty ()) {
55
96
throw new IllegalArgumentException ("Expected one or more iterators, got 0" );
56
97
}
57
98
this .maxDoc = maxDoc ;
58
- iterators = new ArrayList <>(iterators );
59
- iterators .sort (Comparator .comparingLong (DocIdSetIterator ::cost ));
60
- this .iterators = iterators ;
99
+ this .iterators = new ArrayList <>();
100
+ for (DocIdSetIterator iterator : iterators ) {
101
+ this .iterators .add (new DisiWrapper (iterator ));
102
+ }
103
+ for (TwoPhaseIterator twoPhase : twoPhases ) {
104
+ this .iterators .add (new DisiWrapper (twoPhase ));
105
+ }
106
+ this .iterators .sort (Comparator .comparing (w -> w .approximation ().cost ()));
61
107
this .scorable = new SimpleScorable ();
62
108
scorable .score = constantScore ;
63
109
}
@@ -66,21 +112,21 @@ final class DenseConjunctionBulkScorer extends BulkScorer {
66
112
public int score (LeafCollector collector , Bits acceptDocs , int min , int max ) throws IOException {
67
113
collector .setScorer (scorable );
68
114
69
- List <DocIdSetIterator > iterators = this .iterators ;
115
+ List <DisiWrapper > iterators = this .iterators ;
70
116
if (collector .competitiveIterator () != null ) {
71
117
iterators = new ArrayList <>(iterators );
72
- iterators .add (collector .competitiveIterator ());
118
+ iterators .add (new DisiWrapper ( collector .competitiveIterator () ));
73
119
}
74
120
75
- for (DocIdSetIterator it : iterators ) {
76
- min = Math .max (min , it .docID ());
121
+ for (DisiWrapper w : iterators ) {
122
+ min = Math .max (min , w . approximation () .docID ());
77
123
}
78
124
79
125
max = Math .min (max , maxDoc );
80
126
81
- DocIdSetIterator lead = iterators .get (0 );
127
+ DisiWrapper lead = iterators .get (0 );
82
128
if (lead .docID () < min ) {
83
- min = lead .advance (min );
129
+ min = lead .approximation . advance (min );
84
130
}
85
131
86
132
while (min < max ) {
@@ -108,17 +154,17 @@ private static int advance(FixedBitSet set, int i) {
108
154
}
109
155
110
156
private int scoreWindow (
111
- LeafCollector collector , Bits acceptDocs , List <DocIdSetIterator > iterators , int min , int max )
157
+ LeafCollector collector , Bits acceptDocs , List <DisiWrapper > iterators , int min , int max )
112
158
throws IOException {
113
159
114
160
// Advance all iterators to the first doc that is greater than or equal to min. This is
115
161
// important as this is the only place where we can take advantage of a large gap between
116
162
// consecutive matches in any clause.
117
- for (DocIdSetIterator iterator : iterators ) {
118
- if (iterator .docID () >= min ) {
119
- min = iterator .docID ();
163
+ for (DisiWrapper w : iterators ) {
164
+ if (w .docID () >= min ) {
165
+ min = w .docID ();
120
166
} else {
121
- min = iterator .advance (min );
167
+ min = w . approximation () .advance (min );
122
168
}
123
169
if (min >= max ) {
124
170
return min ;
@@ -127,12 +173,12 @@ private int scoreWindow(
127
173
128
174
if (acceptDocs == null ) {
129
175
int minDocIDRunEnd = max ;
130
- for (DocIdSetIterator iterator : iterators ) {
131
- if (iterator .docID () > min ) {
176
+ for (DisiWrapper w : iterators ) {
177
+ if (w .docID () > min ) {
132
178
minDocIDRunEnd = min ;
133
179
break ;
134
180
} else {
135
- minDocIDRunEnd = Math .min (minDocIDRunEnd , iterator .docIDRunEnd ());
181
+ minDocIDRunEnd = Math .min (minDocIDRunEnd , w .docIDRunEnd ());
136
182
}
137
183
}
138
184
@@ -147,22 +193,34 @@ private int scoreWindow(
147
193
148
194
int bitsetWindowMax = (int ) Math .min (max , (long ) min + WINDOW_SIZE );
149
195
150
- for (DocIdSetIterator it : iterators ) {
151
- if (it .docID () > min || it .docIDRunEnd () < bitsetWindowMax ) {
152
- windowIterators .add (it );
196
+ for (DisiWrapper w : iterators ) {
197
+ if (w .docID () > min || w .docIDRunEnd () < bitsetWindowMax ) {
198
+ windowApproximations .add (w .approximation ());
199
+ if (w .twoPhase () != null ) {
200
+ windowTwoPhases .add (w .twoPhase ());
201
+ }
153
202
}
154
203
}
155
204
156
- if (acceptDocs == null && windowIterators .size () == 1 ) {
157
- // We have a range of doc IDs where all matches of an iterator are matches of the conjunction.
158
- singleIteratorDocIdStream .iterator = windowIterators .get (0 );
159
- singleIteratorDocIdStream .from = min ;
160
- singleIteratorDocIdStream .to = bitsetWindowMax ;
161
- collector .collect (singleIteratorDocIdStream );
205
+ if (windowTwoPhases .isEmpty ()) {
206
+ if (acceptDocs == null && windowApproximations .size () == 1 ) {
207
+ // We have a range of doc IDs where all matches of an iterator are matches of the
208
+ // conjunction.
209
+ singleIteratorDocIdStream .iterator = windowApproximations .get (0 );
210
+ singleIteratorDocIdStream .from = min ;
211
+ singleIteratorDocIdStream .to = bitsetWindowMax ;
212
+ collector .collect (singleIteratorDocIdStream );
213
+ } else {
214
+ scoreWindowUsingBitSet (collector , acceptDocs , windowApproximations , min , bitsetWindowMax );
215
+ }
162
216
} else {
163
- scoreWindowUsingBitSet (collector , acceptDocs , windowIterators , min , bitsetWindowMax );
217
+ windowTwoPhases .sort (Comparator .comparingDouble (TwoPhaseIterator ::matchCost ));
218
+ scoreWindowUsingLeapFrog (
219
+ collector , acceptDocs , windowApproximations , windowTwoPhases , min , bitsetWindowMax );
220
+ windowTwoPhases .clear ();
164
221
}
165
- windowIterators .clear ();
222
+ windowApproximations .clear ();
223
+
166
224
return bitsetWindowMax ;
167
225
}
168
226
@@ -238,9 +296,79 @@ private void scoreWindowUsingBitSet(
238
296
windowMatches .clear ();
239
297
}
240
298
299
+ private static void scoreWindowUsingLeapFrog (
300
+ LeafCollector collector ,
301
+ Bits acceptDocs ,
302
+ List <DocIdSetIterator > approximations ,
303
+ List <TwoPhaseIterator > twoPhases ,
304
+ int min ,
305
+ int max )
306
+ throws IOException {
307
+ assert twoPhases .size () > 0 ;
308
+ assert approximations .size () >= twoPhases .size ();
309
+
310
+ if (approximations .size () == 1 ) {
311
+ // scoreWindowUsingLeapFrog is only used if there is at least one two-phase iterator, so our
312
+ // single clause is a two-phase iterator
313
+ assert twoPhases .size () == 1 ;
314
+ DocIdSetIterator approximation = approximations .get (0 );
315
+ TwoPhaseIterator twoPhase = twoPhases .get (0 );
316
+ if (approximation .docID () < min ) {
317
+ approximation .advance (min );
318
+ }
319
+ for (int doc = approximation .docID (); doc < max ; doc = approximation .nextDoc ()) {
320
+ if ((acceptDocs == null || acceptDocs .get (doc )) && twoPhase .matches ()) {
321
+ collector .collect (doc );
322
+ }
323
+ }
324
+ } else {
325
+ DocIdSetIterator lead1 = approximations .get (0 );
326
+ DocIdSetIterator lead2 = approximations .get (1 );
327
+
328
+ if (lead1 .docID () < min ) {
329
+ lead1 .advance (min );
330
+ }
331
+
332
+ advanceHead :
333
+ for (int doc = lead1 .docID (); doc < max ; ) {
334
+ if (acceptDocs != null && acceptDocs .get (doc ) == false ) {
335
+ doc = lead1 .nextDoc ();
336
+ continue ;
337
+ }
338
+ int doc2 = lead2 .docID ();
339
+ if (doc2 < doc ) {
340
+ doc2 = lead2 .advance (doc );
341
+ }
342
+ if (doc != doc2 ) {
343
+ doc = lead1 .advance (Math .min (doc2 , max ));
344
+ continue ;
345
+ }
346
+ for (int i = 2 ; i < approximations .size (); ++i ) {
347
+ DocIdSetIterator other = approximations .get (i );
348
+ int docN = other .docID ();
349
+ if (docN < doc ) {
350
+ docN = other .advance (doc );
351
+ }
352
+ if (doc != docN ) {
353
+ doc = lead1 .advance (Math .min (docN , max ));
354
+ continue advanceHead ;
355
+ }
356
+ }
357
+ for (TwoPhaseIterator twoPhase : twoPhases ) {
358
+ if (twoPhase .matches () == false ) {
359
+ doc = lead1 .nextDoc ();
360
+ continue advanceHead ;
361
+ }
362
+ }
363
+ collector .collect (doc );
364
+ doc = lead1 .nextDoc ();
365
+ }
366
+ }
367
+ }
368
+
241
369
@ Override
242
370
public long cost () {
243
- return iterators .get (0 ).cost ();
371
+ return iterators .get (0 ).approximation (). cost ();
244
372
}
245
373
246
374
final class DocIdStreamView extends DocIdStream {
0 commit comments