Skip to content

Commit 40d7460

Browse files
authored
Clean up FileTypeHint a bit. (#14635)
- more javadocs to guide usage of `INDEX` vs. `DATA`, - the `METADATA` constant is removed, as metadata files should be opened with `Directory#openChecksumIndexInput`, which doesn't take hints, - configure `FileTypeHint` on more files of the default codec, - remove checks on `FileTypeHint` from `toReadAdvice` - the default impl should only look at `DataAccessHint` to determine the appropriate read advice.
1 parent 0412661 commit 40d7460

File tree

7 files changed

+21
-29
lines changed

7 files changed

+21
-29
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java

+2-2
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@ public Lucene103PostingsReader(SegmentReadState state) throws IOException {
164164
String proxName =
165165
IndexFileNames.segmentFileName(
166166
state.segmentInfo.name, state.segmentSuffix, Lucene103PostingsFormat.POS_EXTENSION);
167-
posIn = state.directory.openInput(proxName, state.context);
167+
posIn = state.directory.openInput(proxName, state.context.withHints(FileTypeHint.DATA));
168168
CodecUtil.checkIndexHeader(
169169
posIn, POS_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
170170
CodecUtil.retrieveChecksum(posIn, expectedPosFileLength);
@@ -175,7 +175,7 @@ public Lucene103PostingsReader(SegmentReadState state) throws IOException {
175175
state.segmentInfo.name,
176176
state.segmentSuffix,
177177
Lucene103PostingsFormat.PAY_EXTENSION);
178-
payIn = state.directory.openInput(payName, state.context);
178+
payIn = state.directory.openInput(payName, state.context.withHints(FileTypeHint.DATA));
179179
CodecUtil.checkIndexHeader(
180180
payIn, PAY_CODEC, version, version, state.segmentInfo.getId(), state.segmentSuffix);
181181
CodecUtil.retrieveChecksum(payIn, expectedPayFileLength);

lucene/core/src/java/org/apache/lucene/codecs/lucene103/blocktree/Lucene103BlockTreeTermsReader.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ public Lucene103BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentR
114114
try {
115115
String termsName =
116116
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
117-
termsIn = state.directory.openInput(termsName, state.context);
117+
termsIn = state.directory.openInput(termsName, state.context.withHints(FileTypeHint.DATA));
118118
version =
119119
CodecUtil.checkIndexHeader(
120120
termsIn,

lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingTermVectorsReader.java

+1-5
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@
5858
import org.apache.lucene.store.ChecksumIndexInput;
5959
import org.apache.lucene.store.DataAccessHint;
6060
import org.apache.lucene.store.Directory;
61-
import org.apache.lucene.store.FileDataHint;
6261
import org.apache.lucene.store.FileTypeHint;
6362
import org.apache.lucene.store.IOContext;
6463
import org.apache.lucene.store.IndexInput;
@@ -149,10 +148,7 @@ public Lucene90CompressingTermVectorsReader(
149148
final String vectorsStreamFN =
150149
IndexFileNames.segmentFileName(segment, segmentSuffix, VECTORS_EXTENSION);
151150
vectorsStream =
152-
d.openInput(
153-
vectorsStreamFN,
154-
context.withHints(
155-
FileTypeHint.DATA, FileDataHint.KNN_VECTORS, DataAccessHint.RANDOM));
151+
d.openInput(vectorsStreamFN, context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM));
156152
version =
157153
CodecUtil.checkIndexHeader(
158154
vectorsStream, formatName, VERSION_START, VERSION_CURRENT, si.getId(), segmentSuffix);

lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99HnswVectorsReader.java

+2
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,8 @@ public Lucene99HnswVectorsReader(SegmentReadState state, FlatVectorsReader flatV
111111
Lucene99HnswVectorsFormat.VECTOR_INDEX_EXTENSION,
112112
Lucene99HnswVectorsFormat.VECTOR_INDEX_CODEC_NAME,
113113
state.context.withHints(
114+
// Even though this input is referred to an `indexIn`, it doesn't qualify as
115+
// FileTypeHint#INDEX since it's a large file
114116
FileTypeHint.DATA,
115117
FileDataHint.KNN_VECTORS,
116118
DataAccessHint.RANDOM,

lucene/core/src/java/org/apache/lucene/store/FileTypeHint.java

+13-7
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,18 @@
1616
*/
1717
package org.apache.lucene.store;
1818

19-
/** Hints on the type of file being opened */
19+
/**
20+
* Hints on the type of file being opened.
21+
*
22+
* <p><b>NOTE</b>: There is no constant for metadata files, since metadata files should be opened
23+
* with {@link Directory#openChecksumInput(String)}, which doesn't take hints.
24+
*/
2025
public enum FileTypeHint implements IOContext.FileOpenHint {
21-
/** The file contains metadata */
22-
METADATA,
23-
/** The file contains field data */
24-
DATA,
25-
/** The file contains indexes */
26-
INDEX
26+
/**
27+
* The file contains indexes. It is small (~1% or less of the data size) and generally fits in the
28+
* page cache.
29+
*/
30+
INDEX,
31+
/** The file contains field data. */
32+
DATA
2733
}

lucene/core/src/java/org/apache/lucene/store/MMapDirectory.java

-5
Original file line numberDiff line numberDiff line change
@@ -298,11 +298,6 @@ private static ReadAdvice toReadAdvice(IOContext context) {
298298
return ReadAdvice.SEQUENTIAL;
299299
}
300300

301-
if (context.hints().contains(FileTypeHint.DATA)
302-
|| context.hints().contains(FileTypeHint.INDEX)) {
303-
return ReadAdvice.NORMAL;
304-
}
305-
306301
return Constants.DEFAULT_READADVICE;
307302
}
308303

lucene/test-framework/src/java/org/apache/lucene/tests/store/SerialIOCountingDirectory.java

+2-9
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,10 @@ public ChecksumIndexInput openChecksumInput(String name) throws IOException {
7070
return super.openChecksumInput(name);
7171
}
7272

73-
private static boolean defaultDataAccess(IOContext context) {
74-
// Data or index file type, and no data access hints
75-
return (context.hints().contains(FileTypeHint.DATA)
76-
|| context.hints().contains(FileTypeHint.INDEX))
77-
&& context.hints(DataAccessHint.class).findAny().isEmpty();
78-
}
79-
8073
@Override
8174
public IndexInput openInput(String name, IOContext context) throws IOException {
82-
if (defaultDataAccess(context)) {
83-
// expected to be loaded in memory, only count 1 at open time
75+
if (context.hints().contains(FileTypeHint.INDEX)) {
76+
// expected to be small and fit in the page cache, only count 1 at open time
8477
counter.increment();
8578
return super.openInput(name, context);
8679
}

0 commit comments

Comments
 (0)