Skip to content

Commit 92de7a2

Browse files
committed
Keep evaluating conjunction one doc-at-a-time until dynamic pruning kicks in. (#14739)
This essentially reverts the change from #14701 for conjunctive queries that have not reached their `totalHitsThreshold` yet. This should speed up queries whose total number of matches is in the order of `totalHitsThreshold` or less, such as filtered conjunctions on nightly benchmarks.
1 parent 0386d68 commit 92de7a2

File tree

1 file changed

+55
-11
lines changed

1 file changed

+55
-11
lines changed

lucene/core/src/java/org/apache/lucene/search/BlockMaxConjunctionBulkScorer.java

Lines changed: 55 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -87,18 +87,64 @@ public int score(LeafCollector collector, Bits acceptDocs, int min, int max) thr
8787
// NOTE: windowMax is inclusive
8888
int windowMax = Math.min(scorers[0].advanceShallow(windowMin), max - 1);
8989

90-
float maxWindowScore = Float.POSITIVE_INFINITY;
9190
if (0 < scorable.minCompetitiveScore) {
92-
maxWindowScore = computeMaxScore(windowMin, windowMax);
91+
float maxWindowScore = computeMaxScore(windowMin, windowMax);
92+
scoreWindowScoreFirst(collector, acceptDocs, windowMin, windowMax + 1, maxWindowScore);
93+
} else {
94+
scoreWindowDocFirst(collector, acceptDocs, windowMin, windowMax + 1);
9395
}
94-
scoreWindow(collector, acceptDocs, windowMin, windowMax + 1, maxWindowScore);
9596
windowMin = Math.max(lead.docID(), windowMax + 1);
9697
}
9798

9899
return windowMin >= maxDoc ? DocIdSetIterator.NO_MORE_DOCS : windowMin;
99100
}
100101

101-
private void scoreWindow(
102+
/**
103+
* Score a window of doc IDs by first finding agreement between all iterators, and only then
104+
* compute scores and call the collector.
105+
*/
106+
private void scoreWindowDocFirst(LeafCollector collector, Bits acceptDocs, int min, int max)
107+
throws IOException {
108+
int doc = lead.docID();
109+
if (doc < min) {
110+
doc = lead.advance(min);
111+
}
112+
113+
outer:
114+
while (doc < max) {
115+
if (acceptDocs == null || acceptDocs.get(doc)) {
116+
for (int i = 1; i < iterators.length; ++i) {
117+
DocIdSetIterator iterator = iterators[i];
118+
int otherDoc = iterator.docID();
119+
if (otherDoc < doc) {
120+
otherDoc = iterator.advance(doc);
121+
}
122+
if (doc != otherDoc) {
123+
doc = lead.advance(otherDoc);
124+
continue outer;
125+
}
126+
}
127+
128+
double score = 0;
129+
for (Scorable scorable : scorables) {
130+
score += scorable.score();
131+
}
132+
scorable.score = (float) score;
133+
collector.collect(doc);
134+
}
135+
doc = lead.nextDoc();
136+
}
137+
}
138+
139+
/**
140+
* Score a window of doc IDs by computing matches and scores on the lead costly clause, then
141+
* iterate other clauses one by one to remove documents that do not match and increase the global
142+
* score by the score of the current clause. This is often faster when a minimum competitive score
143+
* is set, as score computations can be more efficient (e.g. thanks to vectorization) and because
144+
* we can skip advancing other clauses if the global score so far is not high enough for a doc to
145+
* have a chance of being competitive.
146+
*/
147+
private void scoreWindowScoreFirst(
102148
LeafCollector collector, Bits acceptDocs, int min, int max, float maxWindowScore)
103149
throws IOException {
104150
if (maxWindowScore < scorable.minCompetitiveScore) {
@@ -120,13 +166,11 @@ private void scoreWindow(
120166
docAndScoreAccBuffer.copyFrom(docAndScoreBuffer);
121167

122168
for (int i = 1; i < scorers.length; ++i) {
123-
if (scorable.minCompetitiveScore > 0) {
124-
ScorerUtil.filterCompetitiveHits(
125-
docAndScoreAccBuffer,
126-
sumOfOtherClauses[i],
127-
scorable.minCompetitiveScore,
128-
scorers.length);
129-
}
169+
ScorerUtil.filterCompetitiveHits(
170+
docAndScoreAccBuffer,
171+
sumOfOtherClauses[i],
172+
scorable.minCompetitiveScore,
173+
scorers.length);
130174

131175
ScorerUtil.applyRequiredClause(docAndScoreAccBuffer, iterators[i], scorables[i]);
132176
}

0 commit comments

Comments
 (0)