Skip to content

Commit 9936b20

Browse files
authored
Update the IOContext rather than the ReadAdvice on IndexInput (#14702)
This ensures the IOContext is set on IndexInput for merges, rather than the raw ReadAdvice. This is because ReadAdvice should be an internal implementation detail of MMapDirectory, not part of IndexInput API.
1 parent 685049f commit 9936b20

File tree

10 files changed

+59
-78
lines changed

10 files changed

+59
-78
lines changed

lucene/core/src/java/org/apache/lucene/codecs/KnnVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,7 +124,7 @@ public abstract void search(
124124
*
125125
* <p>The default implementation returns {@code this}
126126
*/
127-
public KnnVectorsReader getMergeInstance() {
127+
public KnnVectorsReader getMergeInstance() throws IOException {
128128
return this;
129129
}
130130

lucene/core/src/java/org/apache/lucene/codecs/hnsw/FlatVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public abstract RandomVectorScorer getRandomVectorScorer(String field, byte[] ta
9696
* <p>The default implementation returns {@code this}
9797
*/
9898
@Override
99-
public FlatVectorsReader getMergeInstance() {
99+
public FlatVectorsReader getMergeInstance() throws IOException {
100100
return this;
101101
}
102102
}

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99FlatVectorsReader.java

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
2222

2323
import java.io.IOException;
24-
import java.io.UncheckedIOException;
2524
import java.util.Map;
2625
import org.apache.lucene.codecs.CodecUtil;
2726
import org.apache.lucene.codecs.hnsw.FlatVectorsReader;
@@ -45,7 +44,6 @@
4544
import org.apache.lucene.store.FileTypeHint;
4645
import org.apache.lucene.store.IOContext;
4746
import org.apache.lucene.store.IndexInput;
48-
import org.apache.lucene.store.ReadAdvice;
4947
import org.apache.lucene.util.IOUtils;
5048
import org.apache.lucene.util.RamUsageEstimator;
5149
import org.apache.lucene.util.hnsw.RandomVectorScorer;
@@ -63,23 +61,25 @@ public final class Lucene99FlatVectorsReader extends FlatVectorsReader {
6361
private final IntObjectHashMap<FieldEntry> fields = new IntObjectHashMap<>();
6462
private final IndexInput vectorData;
6563
private final FieldInfos fieldInfos;
64+
private final IOContext dataContext;
6665

6766
public Lucene99FlatVectorsReader(SegmentReadState state, FlatVectorsScorer scorer)
6867
throws IOException {
6968
super(scorer);
7069
int versionMeta = readMetadata(state);
7170
this.fieldInfos = state.fieldInfos;
71+
// Flat formats are used to randomly access vectors from their node ID that is stored
72+
// in the HNSW graph.
73+
dataContext =
74+
state.context.withHints(FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM);
7275
try {
7376
vectorData =
7477
openDataInput(
7578
state,
7679
versionMeta,
7780
Lucene99FlatVectorsFormat.VECTOR_DATA_EXTENSION,
7881
Lucene99FlatVectorsFormat.VECTOR_DATA_CODEC_NAME,
79-
// Flat formats are used to randomly access vectors from their node ID that is stored
80-
// in the HNSW graph.
81-
state.context.withHints(
82-
FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM));
82+
dataContext);
8383
} catch (Throwable t) {
8484
IOUtils.closeWhileSuppressingExceptions(t, this);
8585
throw t;
@@ -177,14 +177,10 @@ public void checkIntegrity() throws IOException {
177177
}
178178

179179
@Override
180-
public FlatVectorsReader getMergeInstance() {
181-
try {
182-
// Update the read advice since vectors are guaranteed to be accessed sequentially for merge
183-
this.vectorData.updateReadAdvice(ReadAdvice.SEQUENTIAL);
184-
return this;
185-
} catch (IOException exception) {
186-
throw new UncheckedIOException(exception);
187-
}
180+
public FlatVectorsReader getMergeInstance() throws IOException {
181+
// Update the read advice since vectors are guaranteed to be accessed sequentially for merge
182+
vectorData.updateIOContext(dataContext.withHints(DataAccessHint.SEQUENTIAL));
183+
return this;
188184
}
189185

190186
private FieldEntry getFieldEntryOrThrow(String field) {
@@ -276,7 +272,7 @@ public RandomVectorScorer getRandomVectorScorer(String field, byte[] target) thr
276272
public void finishMerge() throws IOException {
277273
// This makes sure that the access pattern hint is reverted back since HNSW implementation
278274
// needs it
279-
this.vectorData.updateReadAdvice(ReadAdvice.RANDOM);
275+
vectorData.updateIOContext(dataContext);
280276
}
281277

282278
@Override

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ private Lucene99HnswVectorsReader(
131131
}
132132

133133
@Override
134-
public KnnVectorsReader getMergeInstance() {
134+
public KnnVectorsReader getMergeInstance() throws IOException {
135135
return new Lucene99HnswVectorsReader(this, this.flatVectorsReader.getMergeInstance());
136136
}
137137

lucene/core/src/java/org/apache/lucene/codecs/perfield/PerFieldKnnVectorsFormat.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ public FieldsReader(final SegmentReadState readState) throws IOException {
238238
}
239239
}
240240

241-
private FieldsReader(final FieldsReader fieldsReader) {
241+
private FieldsReader(final FieldsReader fieldsReader) throws IOException {
242242
this.fieldInfos = fieldsReader.fieldInfos;
243243
for (FieldInfo fi : this.fieldInfos) {
244244
if (fi.hasVectorValues() && fieldsReader.fields.containsKey(fi.number)) {
@@ -248,7 +248,7 @@ private FieldsReader(final FieldsReader fieldsReader) {
248248
}
249249

250250
@Override
251-
public KnnVectorsReader getMergeInstance() {
251+
public KnnVectorsReader getMergeInstance() throws IOException {
252252
return new FieldsReader(this);
253253
}
254254

lucene/core/src/java/org/apache/lucene/store/IndexInput.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -227,12 +227,12 @@ public String toString() {
227227
public void prefetch(long offset, long length) throws IOException {}
228228

229229
/**
230-
* Optional method: Give a hint to this input about the change in read access pattern. IndexInput
230+
* Optional method: Updates the {@code IOContext} to specify a new read access pattern. IndexInput
231231
* implementations may take advantage of this hint to optimize reads from storage.
232232
*
233233
* <p>The default implementation is a no-op.
234234
*/
235-
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {}
235+
public void updateIOContext(IOContext context) throws IOException {}
236236

237237
/**
238238
* Returns a hint whether all the contents of this input are resident in physical memory. It's a

lucene/core/src/java/org/apache/lucene/store/MemorySegmentIndexInput.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,11 @@ public void prefetch(long offset, long length) throws IOException {
357357
}
358358

359359
@Override
360-
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
360+
public void updateIOContext(IOContext context) throws IOException {
361+
updateReadAdvice(toReadAdvice.apply(context));
362+
}
363+
364+
private void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
361365
if (NATIVE_ACCESS.isEmpty()) {
362366
return;
363367
}

lucene/test-framework/src/java/org/apache/lucene/tests/codecs/asserting/AssertingKnnVectorsFormat.java

Lines changed: 34 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -113,16 +113,17 @@ public long ramBytesUsed() {
113113
public static class AssertingKnnVectorsReader extends KnnVectorsReader
114114
implements HnswGraphProvider {
115115
public final KnnVectorsReader delegate;
116-
final FieldInfos fis;
117-
final boolean mergeInstance;
118-
AtomicInteger mergeInstanceCount = new AtomicInteger();
119-
AtomicInteger finishMergeCount = new AtomicInteger();
116+
private final FieldInfos fis;
117+
private final boolean mergeInstance;
118+
private final AtomicInteger mergeInstanceCount = new AtomicInteger();
119+
private final AtomicInteger finishMergeCount = new AtomicInteger();
120120

121-
AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
121+
private AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis) {
122122
this(delegate, fis, false);
123123
}
124124

125-
AssertingKnnVectorsReader(KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) {
125+
private AssertingKnnVectorsReader(
126+
KnnVectorsReader delegate, FieldInfos fis, boolean mergeInstance) {
126127
assert delegate != null;
127128
this.delegate = delegate;
128129
this.fis = fis;
@@ -136,6 +137,8 @@ public void checkIntegrity() throws IOException {
136137

137138
@Override
138139
public FloatVectorValues getFloatVectorValues(String field) throws IOException {
140+
assert mergeInstanceCount.get() == finishMergeCount.get() || mergeInstance
141+
: "Called on the wrong instance";
139142
FieldInfo fi = fis.fieldInfo(field);
140143
assert fi != null
141144
&& fi.getVectorDimension() > 0
@@ -150,6 +153,8 @@ public FloatVectorValues getFloatVectorValues(String field) throws IOException {
150153

151154
@Override
152155
public ByteVectorValues getByteVectorValues(String field) throws IOException {
156+
assert mergeInstanceCount.get() == finishMergeCount.get() || mergeInstance
157+
: "Called on the wrong instance";
153158
FieldInfo fi = fis.fieldInfo(field);
154159
assert fi != null
155160
&& fi.getVectorDimension() > 0
@@ -165,7 +170,7 @@ public ByteVectorValues getByteVectorValues(String field) throws IOException {
165170
@Override
166171
public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
167172
throws IOException {
168-
assert !mergeInstance;
173+
assert mergeInstanceCount.get() == finishMergeCount.get() : "There is an open merge instance";
169174
FieldInfo fi = fis.fieldInfo(field);
170175
assert fi != null
171176
&& fi.getVectorDimension() > 0
@@ -176,7 +181,7 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
176181
@Override
177182
public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
178183
throws IOException {
179-
assert !mergeInstance;
184+
assert mergeInstanceCount.get() == finishMergeCount.get() : "There is an open merge instance";
180185
FieldInfo fi = fis.fieldInfo(field);
181186
assert fi != null
182187
&& fi.getVectorDimension() > 0
@@ -185,15 +190,28 @@ public void search(String field, byte[] target, KnnCollector knnCollector, Bits
185190
}
186191

187192
@Override
188-
public KnnVectorsReader getMergeInstance() {
189-
assert !mergeInstance;
193+
public KnnVectorsReader getMergeInstance() throws IOException {
190194
var mergeVectorsReader = delegate.getMergeInstance();
191195
assert mergeVectorsReader != null;
192196
mergeInstanceCount.incrementAndGet();
197+
AtomicInteger parentMergeFinishCount = this.finishMergeCount;
193198

194-
final var parent = this;
195199
return new AssertingKnnVectorsReader(
196200
mergeVectorsReader, AssertingKnnVectorsReader.this.fis, true) {
201+
private boolean finished;
202+
203+
@Override
204+
public void search(
205+
String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) {
206+
assert false : "This instance should only be used for merging";
207+
}
208+
209+
@Override
210+
public void search(
211+
String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) {
212+
assert false : "This instance should only be used for merging";
213+
}
214+
197215
@Override
198216
public KnnVectorsReader getMergeInstance() {
199217
assert false; // merging from a merge instance it not allowed
@@ -202,9 +220,10 @@ public KnnVectorsReader getMergeInstance() {
202220

203221
@Override
204222
public void finishMerge() throws IOException {
205-
assert mergeInstance;
223+
assert !finished : "Merging already finished";
224+
finished = true;
206225
delegate.finishMerge();
207-
parent.finishMergeCount.incrementAndGet();
226+
parentMergeFinishCount.incrementAndGet();
208227
}
209228

210229
@Override
@@ -216,9 +235,7 @@ public void close() {
216235

217236
@Override
218237
public void finishMerge() throws IOException {
219-
assert mergeInstance;
220-
delegate.finishMerge();
221-
finishMergeCount.incrementAndGet();
238+
assert false; // can only finish merge on the merge instance
222239
}
223240

224241
@Override
@@ -228,10 +245,8 @@ public Map<String, Long> getOffHeapByteSize(FieldInfo fieldInfo) {
228245

229246
@Override
230247
public void close() throws IOException {
231-
assert !mergeInstance;
232-
delegate.close();
248+
assert mergeInstanceCount.get() == finishMergeCount.get();
233249
delegate.close();
234-
assert finishMergeCount.get() <= 0 || mergeInstanceCount.get() == finishMergeCount.get();
235250
}
236251

237252
@Override

lucene/test-framework/src/java/org/apache/lucene/tests/store/BaseDirectoryTestCase.java

Lines changed: 0 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,6 @@
6161
import org.apache.lucene.store.IndexOutput;
6262
import org.apache.lucene.store.MMapDirectory;
6363
import org.apache.lucene.store.RandomAccessInput;
64-
import org.apache.lucene.store.ReadAdvice;
6564
import org.apache.lucene.tests.mockfile.ExtrasFS;
6665
import org.apache.lucene.tests.util.LuceneTestCase;
6766
import org.apache.lucene.tests.util.TestUtil;
@@ -1571,38 +1570,6 @@ public void testPrefetchOnSlice() throws IOException {
15711570
doTestPrefetch(TestUtil.nextInt(random(), 1, 1024));
15721571
}
15731572

1574-
public void testUpdateReadAdvice() throws IOException {
1575-
try (Directory dir = getDirectory(createTempDir("testUpdateReadAdvice"))) {
1576-
final int totalLength = TestUtil.nextInt(random(), 16384, 65536);
1577-
byte[] arr = new byte[totalLength];
1578-
random().nextBytes(arr);
1579-
try (IndexOutput out = dir.createOutput("temp.bin", IOContext.DEFAULT)) {
1580-
out.writeBytes(arr, arr.length);
1581-
}
1582-
1583-
try (IndexInput orig = dir.openInput("temp.bin", IOContext.DEFAULT)) {
1584-
IndexInput in = random().nextBoolean() ? orig.clone() : orig;
1585-
// Read advice updated at start
1586-
in.updateReadAdvice(randomFrom(random(), ReadAdvice.values()));
1587-
for (int i = 0; i < totalLength; i++) {
1588-
int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1);
1589-
in.seek(offset);
1590-
assertEquals(arr[offset], in.readByte());
1591-
}
1592-
1593-
// Updating readAdvice in the middle
1594-
for (int i = 0; i < 10_000; ++i) {
1595-
int offset = TestUtil.nextInt(random(), 0, (int) in.length() - 1);
1596-
in.seek(offset);
1597-
assertEquals(arr[offset], in.readByte());
1598-
if (random().nextBoolean()) {
1599-
in.updateReadAdvice(randomFrom(random(), ReadAdvice.values()));
1600-
}
1601-
}
1602-
}
1603-
}
1604-
}
1605-
16061573
private void doTestPrefetch(int startOffset) throws IOException {
16071574
try (Directory dir = getDirectory(createTempDir())) {
16081575
final int totalLength = startOffset + TestUtil.nextInt(random(), 16384, 65536);

lucene/test-framework/src/java/org/apache/lucene/tests/store/MockIndexInputWrapper.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
import org.apache.lucene.store.FilterIndexInput;
2626
import org.apache.lucene.store.IOContext;
2727
import org.apache.lucene.store.IndexInput;
28-
import org.apache.lucene.store.ReadAdvice;
2928

3029
/**
3130
* Used by MockDirectoryWrapper to create an input stream that keeps track of when it's been closed.
@@ -186,10 +185,10 @@ public Optional<Boolean> isLoaded() {
186185
}
187186

188187
@Override
189-
public void updateReadAdvice(ReadAdvice readAdvice) throws IOException {
188+
public void updateIOContext(IOContext context) throws IOException {
190189
ensureOpen();
191190
ensureAccessible();
192-
in.updateReadAdvice(readAdvice);
191+
in.updateIOContext(context);
193192
}
194193

195194
@Override

0 commit comments

Comments
 (0)