27
27
* BulkScorer implementation of {@link ConjunctionScorer} that is specialized for dense clauses.
28
28
* Whenever sensible, it intersects clauses by loading their matches into a bit set and computing
29
29
* the intersection of clauses by and-ing these bit sets.
30
- *
31
- * <p>An empty set of iterators is interpreted as meaning that all docs in [0, maxDoc) match.
32
30
*/
33
31
final class DenseConjunctionBulkScorer extends BulkScorer {
34
32
@@ -46,12 +44,16 @@ final class DenseConjunctionBulkScorer extends BulkScorer {
46
44
47
45
private final FixedBitSet windowMatches = new FixedBitSet (WINDOW_SIZE );
48
46
private final FixedBitSet clauseWindowMatches = new FixedBitSet (WINDOW_SIZE );
47
+ private final List <DocIdSetIterator > windowIterators = new ArrayList <>();
49
48
private final DocIdStreamView docIdStreamView = new DocIdStreamView ();
50
49
private final RangeDocIdStream rangeDocIdStream = new RangeDocIdStream ();
51
50
private final SingleIteratorDocIdStream singleIteratorDocIdStream =
52
51
new SingleIteratorDocIdStream ();
53
52
54
53
DenseConjunctionBulkScorer (List <DocIdSetIterator > iterators , int maxDoc , float constantScore ) {
54
+ if (iterators .isEmpty ()) {
55
+ throw new IllegalArgumentException ("Expected one or more iterators, got 0" );
56
+ }
55
57
this .maxDoc = maxDoc ;
56
58
iterators = new ArrayList <>(iterators );
57
59
iterators .sort (Comparator .comparingLong (DocIdSetIterator ::cost ));
@@ -63,6 +65,7 @@ final class DenseConjunctionBulkScorer extends BulkScorer {
63
65
@ Override
64
66
public int score (LeafCollector collector , Bits acceptDocs , int min , int max ) throws IOException {
65
67
collector .setScorer (scorable );
68
+
66
69
List <DocIdSetIterator > iterators = this .iterators ;
67
70
if (collector .competitiveIterator () != null ) {
68
71
iterators = new ArrayList <>(iterators );
@@ -75,37 +78,24 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
75
78
76
79
max = Math .min (max , maxDoc );
77
80
78
- DocIdSetIterator lead = null ;
79
- if (iterators .isEmpty () == false ) {
80
- lead = iterators .get (0 );
81
- if (lead .docID () < min ) {
82
- min = lead .advance (min );
83
- }
81
+ DocIdSetIterator lead = iterators .get (0 );
82
+ if (lead .docID () < min ) {
83
+ min = lead .advance (min );
84
84
}
85
85
86
- if (min >= max ) {
87
- return min >= maxDoc ? DocIdSetIterator .NO_MORE_DOCS : min ;
88
- }
89
-
90
- int windowMax = min ;
91
- do {
86
+ while (min < max ) {
92
87
if (scorable .minCompetitiveScore > scorable .score ) {
93
88
return DocIdSetIterator .NO_MORE_DOCS ;
94
89
}
90
+ min = scoreWindow (collector , acceptDocs , iterators , min , max );
91
+ }
95
92
96
- int windowBase = lead == null ? windowMax : lead .docID ();
97
- windowMax = (int ) Math .min (max , (long ) windowBase + WINDOW_SIZE );
98
- if (windowMax > windowBase ) {
99
- scoreWindowUsingBitSet (collector , acceptDocs , iterators , windowBase , windowMax );
100
- }
101
- } while (windowMax < max );
102
-
103
- if (lead != null ) {
93
+ if (lead .docID () > max ) {
104
94
return lead .docID ();
105
- } else if (windowMax >= maxDoc ) {
95
+ } else if (max >= maxDoc ) {
106
96
return DocIdSetIterator .NO_MORE_DOCS ;
107
97
} else {
108
- return windowMax ;
98
+ return max ;
109
99
}
110
100
}
111
101
@@ -117,6 +107,65 @@ private static int advance(FixedBitSet set, int i) {
117
107
}
118
108
}
119
109
110
+ private int scoreWindow (
111
+ LeafCollector collector , Bits acceptDocs , List <DocIdSetIterator > iterators , int min , int max )
112
+ throws IOException {
113
+
114
+ // Advance all iterators to the first doc that is greater than or equal to min. This is
115
+ // important as this is the only place where we can take advantage of a large gap between
116
+ // consecutive matches in any clause.
117
+ for (DocIdSetIterator iterator : iterators ) {
118
+ if (iterator .docID () >= min ) {
119
+ min = iterator .docID ();
120
+ } else {
121
+ min = iterator .advance (min );
122
+ }
123
+ }
124
+ if (min >= max ) {
125
+ return min ;
126
+ }
127
+
128
+ if (acceptDocs == null ) {
129
+ int minDocIDRunEnd = max ;
130
+ for (DocIdSetIterator iterator : iterators ) {
131
+ if (iterator .docID () > min ) {
132
+ minDocIDRunEnd = min ;
133
+ break ;
134
+ } else {
135
+ minDocIDRunEnd = Math .min (minDocIDRunEnd , iterator .docIDRunEnd ());
136
+ }
137
+ }
138
+
139
+ if (minDocIDRunEnd - min >= WINDOW_SIZE / 2 ) {
140
+ // We have a large range of doc IDs that all match.
141
+ rangeDocIdStream .from = min ;
142
+ rangeDocIdStream .to = minDocIDRunEnd ;
143
+ collector .collect (rangeDocIdStream );
144
+ return minDocIDRunEnd ;
145
+ }
146
+ }
147
+
148
+ int bitsetWindowMax = (int ) Math .min (max , (long ) min + WINDOW_SIZE );
149
+
150
+ for (DocIdSetIterator it : iterators ) {
151
+ if (it .docID () > min || it .docIDRunEnd () < bitsetWindowMax ) {
152
+ windowIterators .add (it );
153
+ }
154
+ }
155
+
156
+ if (acceptDocs == null && windowIterators .size () == 1 ) {
157
+ // We have a range of doc IDs where all matches of an iterator are matches of the conjunction.
158
+ singleIteratorDocIdStream .iterator = windowIterators .get (0 );
159
+ singleIteratorDocIdStream .from = min ;
160
+ singleIteratorDocIdStream .to = bitsetWindowMax ;
161
+ collector .collect (singleIteratorDocIdStream );
162
+ } else {
163
+ scoreWindowUsingBitSet (collector , acceptDocs , windowIterators , min , bitsetWindowMax );
164
+ }
165
+ windowIterators .clear ();
166
+ return bitsetWindowMax ;
167
+ }
168
+
120
169
private void scoreWindowUsingBitSet (
121
170
LeafCollector collector ,
122
171
Bits acceptDocs ,
@@ -128,26 +177,14 @@ private void scoreWindowUsingBitSet(
128
177
assert windowMatches .scanIsEmpty ();
129
178
assert clauseWindowMatches .scanIsEmpty ();
130
179
131
- if (acceptDocs == null ) {
132
- if (iterators .isEmpty ()) {
133
- // All docs in the range match.
134
- rangeDocIdStream .from = windowBase ;
135
- rangeDocIdStream .to = windowMax ;
136
- collector .collect (rangeDocIdStream );
137
- return ;
138
- } else if (iterators .size () == 1 ) {
139
- singleIteratorDocIdStream .iterator = iterators .get (0 );
140
- singleIteratorDocIdStream .from = windowBase ;
141
- singleIteratorDocIdStream .to = windowMax ;
142
- collector .collect (singleIteratorDocIdStream );
143
- return ;
144
- }
145
- }
146
-
147
180
if (iterators .isEmpty ()) {
181
+ // This happens if all clauses fully matched the window and there are deleted docs.
148
182
windowMatches .set (0 , windowMax - windowBase );
149
183
} else {
150
184
DocIdSetIterator lead = iterators .get (0 );
185
+ if (lead .docID () < windowBase ) {
186
+ lead .advance (windowBase );
187
+ }
151
188
lead .intoBitSet (windowMax , windowMatches , windowBase );
152
189
}
153
190
@@ -199,28 +236,11 @@ private void scoreWindowUsingBitSet(
199
236
docIdStreamView .windowBase = windowBase ;
200
237
collector .collect (docIdStreamView );
201
238
windowMatches .clear ();
202
-
203
- // If another clause is more advanced than the leading clause then advance the leading clause,
204
- // it's important to take advantage of large gaps in the postings lists of other clauses.
205
- if (iterators .size () >= 2 ) {
206
- DocIdSetIterator lead = iterators .get (0 );
207
- int maxOtherDocID = -1 ;
208
- for (int i = 1 ; i < iterators .size (); ++i ) {
209
- maxOtherDocID = Math .max (maxOtherDocID , iterators .get (i ).docID ());
210
- }
211
- if (lead .docID () < maxOtherDocID ) {
212
- lead .advance (maxOtherDocID );
213
- }
214
- }
215
239
}
216
240
217
241
@ Override
218
242
public long cost () {
219
- if (iterators .isEmpty ()) {
220
- return maxDoc ;
221
- } else {
222
- return iterators .get (0 ).cost ();
223
- }
243
+ return iterators .get (0 ).cost ();
224
244
}
225
245
226
246
final class DocIdStreamView extends DocIdStream {
@@ -287,6 +307,9 @@ public int count() throws IOException {
287
307
// If the collector is just interested in the count, loading in a bit set and counting bits is
288
308
// often faster than incrementing a counter on every call to nextDoc().
289
309
assert windowMatches .scanIsEmpty ();
310
+ if (iterator .docID () < from ) {
311
+ iterator .advance (from );
312
+ }
290
313
iterator .intoBitSet (to , clauseWindowMatches , from );
291
314
int count = clauseWindowMatches .cardinality ();
292
315
clauseWindowMatches .clear ();
0 commit comments