Skip to content

Commit 185c8e6

Browse files
committed
iter
1 parent 0691eea commit 185c8e6

File tree

2 files changed

+32
-29
lines changed

2 files changed

+32
-29
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,12 @@ public void seek(int level, int targetOrd) throws IOException {
509509
: Arrays.binarySearch(nodesByLevel[level], 0, nodesByLevel[level].length, targetOrd);
510510
assert targetIndex >= 0
511511
: "seek level=" + level + " target=" + targetOrd + " not found: " + targetIndex;
512+
if (targetIndex < 0) {
513+
arc = -1;
514+
arcUpTo = 0;
515+
arcCount = 0;
516+
return;
517+
}
512518
// unsafe; no bounds checking
513519
dataIn.seek(graphLevelNodeOffsets.get(targetIndex + graphLevelNodeIndexOffsets[level]));
514520
arcCount = dataIn.readVInt();

lucene/core/src/java/org/apache/lucene/util/hnsw/FilteredHnswGraphSearcher.java

Lines changed: 26 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -41,25 +41,21 @@
4141
public class FilteredHnswGraphSearcher extends HnswGraphSearcher {
4242
// The maximum percentage of filtered docs before using this filtered strategy becomes less
4343
// effective than regular HNSW search
44-
static final float MAX_FILTER_THRESHOLD = 0.60f;
44+
static final float MAX_FILTER_THRESHOLD = 1.0f;
4545

4646
// How many filtered candidates must be found to consider N-hop neighbors
4747
private static final float EXPANDED_EXPLORATION_LAMBDA = 0.10f;
4848

49-
private final BitSet explorationVisited;
5049
private final int maxExplorationMultiplier;
50+
private final int minToScore;
5151

5252
/** Creates a new graph searcher. */
5353
private FilteredHnswGraphSearcher(
54-
NeighborQueue candidates,
55-
BitSet explorationVisited,
56-
BitSet visited,
57-
int filterSize,
58-
HnswGraph graph) {
54+
NeighborQueue candidates, BitSet visited, int filterSize, HnswGraph graph) {
5955
super(candidates, visited);
6056
assert graph.maxConn() > 0 : "graph must have known max connections";
61-
this.explorationVisited = explorationVisited;
62-
this.maxExplorationMultiplier = Math.min(graph.size() / filterSize, 8);
57+
this.maxExplorationMultiplier = Math.min(graph.size() / filterSize, graph.maxConn() / 2);
58+
this.minToScore = Math.max(graph.maxConn() / 4, 1);
6359
}
6460

6561
/**
@@ -80,11 +76,7 @@ public static FilteredHnswGraphSearcher create(
8076
throw new IllegalArgumentException("filterSize must be > 0 and < graph size");
8177
}
8278
return new FilteredHnswGraphSearcher(
83-
new NeighborQueue(k, true),
84-
bitSet(filterSize, getGraphSize(graph), k),
85-
new SparseFixedBitSet(getGraphSize(graph)),
86-
filterSize,
87-
graph);
79+
new NeighborQueue(k, true), bitSet(filterSize, getGraphSize(graph), k), filterSize, graph);
8880
}
8981

9082
private static BitSet bitSet(long filterSize, int graphSize, int topk) {
@@ -164,22 +156,33 @@ void searchLevel(
164156
float filteredAmount = toExplore.count() / (float) neighborCount;
165157
int maxToScoreCount =
166158
(int) (neighborCount * Math.min(maxExplorationMultiplier, 1f / (1f - filteredAmount)));
159+
int maxAdditionalToExploreCount = toExplore.capacity() - 1;
167160
// There is enough filtered, or we don't have enough candidates to score and explore
168-
if (toScore.count() < maxToScoreCount && filteredAmount > EXPANDED_EXPLORATION_LAMBDA) {
161+
int totalExplored = toScore.count() + toExplore.count();
162+
if (toScore.count() < maxToScoreCount
163+
&& filteredAmount > EXPANDED_EXPLORATION_LAMBDA
164+
&& totalExplored < maxAdditionalToExploreCount) {
169165
// Now we need to explore the neighbors of the neighbors
170166
int exploreFriend;
171167
while ((exploreFriend = toExplore.poll()) != NO_MORE_DOCS
168+
// only explore initial additional neighborhood
169+
&& totalExplored < maxAdditionalToExploreCount
172170
&& toScore.count() < maxToScoreCount) {
173171
graphSeek(graph, level, exploreFriend);
174172
int friendOfAFriendOrd;
175173
while ((friendOfAFriendOrd = graph.nextNeighbor()) != NO_MORE_DOCS
176174
&& toScore.count() < maxToScoreCount) {
177-
if (visited.get(friendOfAFriendOrd)
178-
|| explorationVisited.getAndSet(friendOfAFriendOrd)) {
175+
if (visited.getAndSet(friendOfAFriendOrd)) {
179176
continue;
180177
}
178+
totalExplored++;
181179
if (acceptOrds.get(friendOfAFriendOrd)) {
182180
toScore.add(friendOfAFriendOrd);
181+
// If we have YET to find a minimum of number candidates, we will continue to explore
182+
// until our max
183+
} else if (totalExplored < maxAdditionalToExploreCount
184+
&& toScore.count() < minToScore) {
185+
toExplore.add(friendOfAFriendOrd);
183186
}
184187
}
185188
}
@@ -202,7 +205,6 @@ void searchLevel(
202205
private void prepareScratchState() {
203206
candidates.clear();
204207
visited.clear();
205-
explorationVisited.clear();
206208
}
207209

208210
private static class IntArrayQueue {
@@ -214,22 +216,17 @@ private static class IntArrayQueue {
214216
nodes = new int[capacity];
215217
}
216218

217-
int count() {
218-
return size - upto;
219+
int capacity() {
220+
return nodes.length;
219221
}
220222

221-
void expand(int capacity) {
222-
if (nodes.length < capacity) {
223-
int[] newNodes = new int[capacity];
224-
System.arraycopy(nodes, 0, newNodes, 0, size);
225-
nodes = newNodes;
226-
}
223+
int count() {
224+
return size - upto;
227225
}
228226

229227
void add(int node) {
230-
assert isFull() == false;
231-
if (size == nodes.length) {
232-
expand(size * 2);
228+
if (isFull()) {
229+
throw new UnsupportedOperationException("Initial capacity should remain unchanged");
233230
}
234231
nodes[size++] = node;
235232
}

0 commit comments

Comments
 (0)