Skip to content

Commit d52cefa

Browse files
Fix performance regression in ApproximatePointRangeQuery with Lucene 10.2.1 change (opensearch-project#18358)
--------- Signed-off-by: Prudhvi Godithi <[email protected]>
1 parent 6cd166f commit d52cefa

File tree

3 files changed

+312
-71
lines changed

3 files changed

+312
-71
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
7777
- Fix 'system call filter not installed' caused when network.host: 0.0.0.0 ([#18309](https://github.com/opensearch-project/OpenSearch/pull/18309))
7878
- Fix MatrixStatsAggregator reuse when mode parameter changes ([#18242](https://github.com/opensearch-project/OpenSearch/issues/18242))
7979
- Replace the deprecated construction method of TopScoreDocCollectorManager with the new method ([#18395](https://github.com/opensearch-project/OpenSearch/pull/18395))
80+
- Fixed Approximate Framework regression with Lucene 10.2.1 by updating `intersectRight` BKD walk and `IntRef` visit method ([#18358](https://github.com/opensearch-project/OpenSearch/issues/18358
8081

8182
### Security
8283

server/src/main/java/org/opensearch/search/approximate/ApproximatePointRangeQuery.java

Lines changed: 28 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,6 @@ public void grow(int count) {
162162
@Override
163163
public void visit(int docID) {
164164
// it is possible that size < 1024 and docCount < size but we will continue to count through all the 1024 docs
165-
// and collect less, but it won't hurt performance
166-
if (docCount[0] >= size) {
167-
return;
168-
}
169165
adder.add(docID);
170166
docCount[0]++;
171167
}
@@ -177,9 +173,8 @@ public void visit(DocIdSetIterator iterator) throws IOException {
177173

178174
@Override
179175
public void visit(IntsRef ref) {
180-
for (int i = 0; i < ref.length; i++) {
181-
adder.add(ref.ints[ref.offset + i]);
182-
}
176+
adder.add(ref);
177+
docCount[0] += ref.length;
183178
}
184179

185180
@Override
@@ -248,10 +243,10 @@ private void intersectRight(PointValues.PointTree pointTree, PointValues.Interse
248243
// custom intersect visitor to walk the left of the tree
249244
public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount)
250245
throws IOException {
251-
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
252246
if (docCount[0] >= size) {
253247
return;
254248
}
249+
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
255250
switch (r) {
256251
case CELL_OUTSIDE_QUERY:
257252
// This cell is fully outside the query shape: stop recursing
@@ -293,65 +288,49 @@ public void intersectLeft(PointValues.IntersectVisitor visitor, PointValues.Poin
293288
}
294289
}
295290

296-
// custom intersect visitor to walk the right of tree
291+
// custom intersect visitor to walk the right of tree (from rightmost leaf going left)
297292
public void intersectRight(PointValues.IntersectVisitor visitor, PointValues.PointTree pointTree, long[] docCount)
298293
throws IOException {
299-
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
300294
if (docCount[0] >= size) {
301295
return;
302296
}
297+
PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue());
303298
switch (r) {
304-
case CELL_OUTSIDE_QUERY:
305-
// This cell is fully outside the query shape: stop recursing
306-
break;
307-
308299
case CELL_INSIDE_QUERY:
309-
// If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement
310-
if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) {
311-
intersectRight(visitor, pointTree, docCount);
312-
pointTree.moveToParent();
313-
}
314-
// if point tree size is no longer over, we have to go back one level where it still was over and the intersect left
315-
else if (pointTree.size() <= size && docCount[0] < size) {
316-
pointTree.moveToParent();
317-
intersectLeft(visitor, pointTree, docCount);
318-
}
319-
// if we've reached leaf, it means out size is under the size of the leaf, we can just collect all docIDs
320-
else {
321-
// Leaf node; scan and filter all points in this block:
322-
if (docCount[0] < size) {
323-
pointTree.visitDocIDs(visitor);
324-
}
325-
}
326-
break;
327300
case CELL_CROSSES_QUERY:
328-
// If the cell is fully inside, we keep moving right as long as the point tree size is over our size requirement
329-
if (pointTree.size() > size && docCount[0] < size && moveRight(pointTree)) {
330-
intersectRight(visitor, pointTree, docCount);
331-
pointTree.moveToParent();
332-
}
333-
// if point tree size is no longer over, we have to go back one level where it still was over and the intersect left
334-
else if (pointTree.size() <= size && docCount[0] < size) {
301+
if (pointTree.moveToChild() && docCount[0] < size) {
302+
PointValues.PointTree leftChild = pointTree.clone();
303+
// BKD is binary today, so one moveToSibling() is enough to land on the right child.
304+
// If PointTree ever becomes n-ary, update the traversal below to visit all siblings or re-enable a full loop.
305+
if (pointTree.moveToSibling()) {
306+
// We have two children - visit right first
307+
intersectRight(visitor, pointTree, docCount);
308+
// Then visit left if we still need more docs
309+
if (docCount[0] < size) {
310+
intersectRight(visitor, leftChild, docCount);
311+
}
312+
} else {
313+
// Only one child - visit it
314+
intersectRight(visitor, leftChild, docCount);
315+
}
335316
pointTree.moveToParent();
336-
intersectLeft(visitor, pointTree, docCount);
337-
}
338-
// if we've reached leaf, it means out size is under the size of the leaf, we can just collect all doc values
339-
else {
340-
// Leaf node; scan and filter all points in this block:
317+
} else {
341318
if (docCount[0] < size) {
342-
pointTree.visitDocValues(visitor);
319+
if (r == PointValues.Relation.CELL_INSIDE_QUERY) {
320+
pointTree.visitDocIDs(visitor);
321+
} else {
322+
pointTree.visitDocValues(visitor);
323+
}
343324
}
344325
}
345326
break;
327+
case CELL_OUTSIDE_QUERY:
328+
break;
346329
default:
347330
throw new IllegalArgumentException("Unreachable code");
348331
}
349332
}
350333

351-
public boolean moveRight(PointValues.PointTree pointTree) throws IOException {
352-
return pointTree.moveToChild() && pointTree.moveToSibling();
353-
}
354-
355334
@Override
356335
public ScorerSupplier scorerSupplier(LeafReaderContext context) throws IOException {
357336
LeafReader reader = context.reader();

0 commit comments

Comments
 (0)