Skip to content

Commit e865d00

Browse files
committed
Optimize AbstractKnnVectorQuery#createBitSet with intoBitset (#14674)
1 parent 3606763 commit e865d00

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,8 @@ Optimizations
6565

6666
* GITHUB#14709: Speed up TermQuery by Scorer#nextDocsAndScores. (Guo Feng)
6767

68+
* GITHUB#14674: Optimize AbstractKnnVectorQuery#createBitSet with intoBitset. (Guo Feng)
69+
6870
Bug Fixes
6971
---------------------
7072
* GITHUB#14654: ValueSource.fromDoubleValuesSource(dvs).getSortField() would throw errors when

lucene/core/src/java/org/apache/lucene/search/AbstractKnnVectorQuery.java

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.lucene.util.BitSet;
3838
import org.apache.lucene.util.BitSetIterator;
3939
import org.apache.lucene.util.Bits;
40+
import org.apache.lucene.util.FixedBitSet;
4041

4142
/**
4243
* Uses {@link KnnVectorsReader#search} to perform nearest neighbour search.
@@ -171,15 +172,25 @@ private BitSet createBitSet(DocIdSetIterator iterator, Bits liveDocs, int maxDoc
171172
// If we already have a BitSet and no deletions, reuse the BitSet
172173
return bitSetIterator.getBitSet();
173174
} else {
174-
// Create a new BitSet from matching and live docs
175-
FilteredDocIdSetIterator filterIterator =
176-
new FilteredDocIdSetIterator(iterator) {
177-
@Override
178-
protected boolean match(int doc) {
179-
return liveDocs == null || liveDocs.get(doc);
180-
}
181-
};
182-
return BitSet.of(filterIterator, maxDoc);
175+
int threshold = maxDoc >> 7; // same as BitSet#of
176+
if (iterator.cost() >= threshold) {
177+
// take advantage of Disi#intoBitset and Bits#applyMask
178+
FixedBitSet bitSet = new FixedBitSet(maxDoc);
179+
bitSet.or(iterator);
180+
if (liveDocs != null) {
181+
liveDocs.applyMask(bitSet, 0);
182+
}
183+
return bitSet;
184+
} else {
185+
FilteredDocIdSetIterator filterIterator =
186+
new FilteredDocIdSetIterator(iterator) {
187+
@Override
188+
protected boolean match(int doc) {
189+
return liveDocs == null || liveDocs.get(doc);
190+
}
191+
};
192+
return BitSet.of(filterIterator, maxDoc); // create a sparse bitset
193+
}
183194
}
184195
}
185196

lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.lucene.util.BitSet;
3030
import org.apache.lucene.util.BitSetIterator;
3131
import org.apache.lucene.util.Bits;
32+
import org.apache.lucene.util.FixedBitSet;
3233

3334
/**
3435
* Search for all (approximate) vectors above a similarity threshold.
@@ -142,14 +143,27 @@ public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOExcepti
142143
acceptDocs = bitSetIterator.getBitSet();
143144
} else {
144145
// Else collect all matching docs
145-
FilteredDocIdSetIterator filtered =
146-
new FilteredDocIdSetIterator(scorer.iterator()) {
147-
@Override
148-
protected boolean match(int doc) {
149-
return liveDocs == null || liveDocs.get(doc);
150-
}
151-
};
152-
acceptDocs = BitSet.of(filtered, leafReader.maxDoc());
146+
DocIdSetIterator iterator = scorer.iterator();
147+
final int maxDoc = leafReader.maxDoc();
148+
int threshold = maxDoc >> 7; // same as BitSet#of
149+
if (iterator.cost() >= threshold) {
150+
// take advantage of Disi#intoBitset and Bits#applyMask
151+
FixedBitSet bitSet = new FixedBitSet(maxDoc);
152+
bitSet.or(iterator);
153+
if (liveDocs != null) {
154+
liveDocs.applyMask(bitSet, 0);
155+
}
156+
acceptDocs = bitSet;
157+
} else {
158+
FilteredDocIdSetIterator filterIterator =
159+
new FilteredDocIdSetIterator(iterator) {
160+
@Override
161+
protected boolean match(int doc) {
162+
return liveDocs == null || liveDocs.get(doc);
163+
}
164+
};
165+
acceptDocs = BitSet.of(filterIterator, maxDoc); // create a sparse bitset
166+
}
153167
}
154168

155169
int cardinality = acceptDocs.cardinality();

0 commit comments

Comments
 (0)