Skip to content

Commit 9ca6ca0

Browse files
committed
Completion FSTs to be loaded off-heap at all times (#14364)
All the existing completion postings format load their FSTs on-heap. It is possible to customize that behaviour by mainintaing a custom postings format that override the fst load mode. TestSuggestField attempted to test all modes, but in practice, it can only test the default load mode because the read path relies on the default constructor called via SPI, that does not allow providing the fst load mode. This commit switches loading of FST to off-heap, and removes the fst load mode argument from all completion postings format classes, effectively making the loading always off-heap.
1 parent 061292b commit 9ca6ca0

13 files changed

+28
-143
lines changed

lucene/CHANGES.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,7 @@ Improvements
100100

101101
* GITHUB#14213: Allowing indexing stored-only StoredField directly from DataInput. (Tim Brooks)
102102

103+
* GITHUB#14364: Completion FSTs to be loaded off-heap at all times. (Luca Cavanna)
103104

104105
Optimizations
105106
---------------------

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion101PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,9 @@
2525
* @lucene.experimental
2626
*/
2727
public class Completion101PostingsFormat extends CompletionPostingsFormat {
28-
/** Creates a {@link Completion101PostingsFormat} that will load the completion FST on-heap. */
28+
/** Creates a {@link Completion101PostingsFormat}. */
2929
public Completion101PostingsFormat() {
30-
this(FSTLoadMode.ON_HEAP);
31-
}
32-
33-
/**
34-
* Creates a {@link Completion101PostingsFormat} that will use the provided <code>fstLoadMode
35-
* </code> to determine if the completion FST should be loaded on or off heap.
36-
*/
37-
public Completion101PostingsFormat(FSTLoadMode fstLoadMode) {
38-
super("Completion101", fstLoadMode);
30+
super("Completion101");
3931
}
4032

4133
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion50PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,9 @@
2727
* @lucene.experimental
2828
*/
2929
public class Completion50PostingsFormat extends CompletionPostingsFormat {
30-
/** Creates a {@link Completion50PostingsFormat} that will load the completion FST on-heap. */
30+
/** Creates a {@link Completion50PostingsFormat}. */
3131
public Completion50PostingsFormat() {
32-
this(FSTLoadMode.ON_HEAP);
33-
}
34-
35-
/**
36-
* Creates a {@link Completion50PostingsFormat} that will use the provided <code>fstLoadMode
37-
* </code> to determine if the completion FST should be loaded on or off heap.
38-
*/
39-
public Completion50PostingsFormat(FSTLoadMode fstLoadMode) {
40-
super("completion", fstLoadMode);
32+
super("completion");
4133
}
4234

4335
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion84PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,9 @@
2727
* @lucene.experimental
2828
*/
2929
public class Completion84PostingsFormat extends CompletionPostingsFormat {
30-
/** Creates a {@link Completion84PostingsFormat} that will load the completion FST on-heap. */
30+
/** Creates a {@link Completion84PostingsFormat}. */
3131
public Completion84PostingsFormat() {
32-
this(FSTLoadMode.ON_HEAP);
33-
}
34-
35-
/**
36-
* Creates a {@link Completion84PostingsFormat} that will use the provided <code>fstLoadMode
37-
* </code> to determine if the completion FST should be loaded on or off heap.
38-
*/
39-
public Completion84PostingsFormat(FSTLoadMode fstLoadMode) {
40-
super("Completion84", fstLoadMode);
32+
super("Completion84");
4133
}
4234

4335
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion90PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,9 @@
2727
* @lucene.experimental
2828
*/
2929
public class Completion90PostingsFormat extends CompletionPostingsFormat {
30-
/** Creates a {@link Completion90PostingsFormat} that will load the completion FST on-heap. */
30+
/** Creates a {@link Completion90PostingsFormat}. */
3131
public Completion90PostingsFormat() {
32-
this(FSTLoadMode.ON_HEAP);
33-
}
34-
35-
/**
36-
* Creates a {@link Completion90PostingsFormat} that will use the provided <code>fstLoadMode
37-
* </code> to determine if the completion FST should be loaded on or off heap.
38-
*/
39-
public Completion90PostingsFormat(FSTLoadMode fstLoadMode) {
40-
super("Completion90", fstLoadMode);
32+
super("Completion90");
4133
}
4234

4335
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion912PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,9 @@
2727
* @lucene.experimental
2828
*/
2929
public class Completion912PostingsFormat extends CompletionPostingsFormat {
30-
/** Creates a {@link Completion912PostingsFormat} that will load the completion FST on-heap. */
30+
/** Creates a {@link Completion912PostingsFormat}. */
3131
public Completion912PostingsFormat() {
32-
this(FSTLoadMode.ON_HEAP);
33-
}
34-
35-
/**
36-
* Creates a {@link Completion912PostingsFormat} that will use the provided <code>fstLoadMode
37-
* </code> to determine if the completion FST should be loaded on or off heap.
38-
*/
39-
public Completion912PostingsFormat(FSTLoadMode fstLoadMode) {
40-
super("Completion912", fstLoadMode);
32+
super("Completion912");
4133
}
4234

4335
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/Completion99PostingsFormat.java

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,9 @@
2727
* @lucene.experimental
2828
*/
2929
public class Completion99PostingsFormat extends CompletionPostingsFormat {
30-
/** Creates a {@link Completion99PostingsFormat} that will load the completion FST on-heap. */
30+
/** Creates a {@link Completion99PostingsFormat}. */
3131
public Completion99PostingsFormat() {
32-
this(FSTLoadMode.ON_HEAP);
33-
}
34-
35-
/**
36-
* Creates a {@link Completion99PostingsFormat} that will use the provided <code>fstLoadMode
37-
* </code> to determine if the completion FST should be loaded on or off heap.
38-
*/
39-
public Completion99PostingsFormat(FSTLoadMode fstLoadMode) {
40-
super("Completion99", fstLoadMode);
32+
super("Completion99");
4133
}
4234

4335
@Override

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionFieldsProducer.java

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
import org.apache.lucene.index.IndexFileNames;
3636
import org.apache.lucene.index.SegmentReadState;
3737
import org.apache.lucene.index.Terms;
38-
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat.FSTLoadMode;
3938
import org.apache.lucene.store.ChecksumIndexInput;
4039
import org.apache.lucene.store.IndexInput;
4140
import org.apache.lucene.util.Accountable;
@@ -56,7 +55,7 @@
5655
final class CompletionFieldsProducer extends FieldsProducer implements Accountable {
5756

5857
private FieldsProducer delegateFieldsProducer;
59-
private Map<String, CompletionsTermsReader> readers;
58+
private final Map<String, CompletionsTermsReader> readers;
6059
private IndexInput dictIn;
6160

6261
// copy ctr for merge instance
@@ -66,8 +65,7 @@ private CompletionFieldsProducer(
6665
this.readers = readers;
6766
}
6867

69-
CompletionFieldsProducer(String codecName, SegmentReadState state, FSTLoadMode fstLoadMode)
70-
throws IOException {
68+
CompletionFieldsProducer(String codecName, SegmentReadState state) throws IOException {
7169
String indexFile =
7270
IndexFileNames.segmentFileName(
7371
state.segmentInfo.name, state.segmentSuffix, INDEX_EXTENSION);
@@ -114,8 +112,7 @@ private CompletionFieldsProducer(
114112
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNumber);
115113
// we don't load the FST yet
116114
readers.put(
117-
fieldInfo.name,
118-
new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type, fstLoadMode));
115+
fieldInfo.name, new CompletionsTermsReader(dictIn, offset, minWeight, maxWeight, type));
119116
}
120117
CodecUtil.checkFooter(index);
121118
success = true;

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionPostingsFormat.java

Lines changed: 2 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -104,36 +104,9 @@ public abstract class CompletionPostingsFormat extends PostingsFormat {
104104
static final String INDEX_EXTENSION = "cmp";
105105
static final String DICT_EXTENSION = "lkp";
106106

107-
/** An enum that allows to control if suggester FSTs are loaded into memory or read off-heap */
108-
public enum FSTLoadMode {
109-
/**
110-
* Always read FSTs from disk. NOTE: If this option is used the FST will be read off-heap even
111-
* if buffered directory implementations are used.
112-
*/
113-
OFF_HEAP,
114-
/** Never read FSTs from disk ie. all suggest fields FSTs are loaded into memory */
115-
ON_HEAP,
116-
/**
117-
* Automatically make the decision if FSTs are read from disk depending if the segment read from
118-
* an MMAPDirectory
119-
*/
120-
AUTO
121-
}
122-
123-
private final FSTLoadMode fstLoadMode;
124-
125-
/** Used only by core Lucene at read-time via Service Provider instantiation */
107+
/** Creates a {@link CompletionPostingsFormat} with the given name. */
126108
public CompletionPostingsFormat(String name) {
127-
this(name, FSTLoadMode.ON_HEAP);
128-
}
129-
130-
/**
131-
* Creates a {@link CompletionPostingsFormat} that will use the provided <code>fstLoadMode</code>
132-
* to determine if the completion FST should be loaded on or off heap.
133-
*/
134-
public CompletionPostingsFormat(String name, FSTLoadMode fstLoadMode) {
135109
super(name);
136-
this.fstLoadMode = fstLoadMode;
137110
}
138111

139112
/** Concrete implementation should specify the delegating postings format */
@@ -153,6 +126,6 @@ public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException
153126

154127
@Override
155128
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
156-
return new CompletionFieldsProducer(getName(), state, fstLoadMode);
129+
return new CompletionFieldsProducer(getName(), state);
157130
}
158131
}

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/CompletionsTermsReader.java

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
import java.io.IOException;
2020
import java.util.Collection;
2121
import java.util.Collections;
22-
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat.FSTLoadMode;
2322
import org.apache.lucene.store.IndexInput;
2423
import org.apache.lucene.util.Accountable;
2524

@@ -41,29 +40,21 @@ public final class CompletionsTermsReader implements Accountable {
4140
private final IndexInput dictIn;
4241
private final long offset;
4342

44-
private final FSTLoadMode fstLoadMode;
45-
4643
private NRTSuggester suggester;
4744

4845
/**
4946
* Creates a CompletionTermsReader to load a field-specific suggester from the index <code>dictIn
5047
* </code> with <code>offset</code>
5148
*/
5249
CompletionsTermsReader(
53-
IndexInput dictIn,
54-
long offset,
55-
long minWeight,
56-
long maxWeight,
57-
byte type,
58-
FSTLoadMode fstLoadMode) {
50+
IndexInput dictIn, long offset, long minWeight, long maxWeight, byte type) {
5951
assert minWeight <= maxWeight;
6052
assert offset >= 0l && offset < dictIn.length();
6153
this.dictIn = dictIn;
6254
this.offset = offset;
6355
this.minWeight = minWeight;
6456
this.maxWeight = maxWeight;
6557
this.type = type;
66-
this.fstLoadMode = fstLoadMode;
6758
}
6859

6960
/**
@@ -73,7 +64,7 @@ public final class CompletionsTermsReader implements Accountable {
7364
public synchronized NRTSuggester suggester() throws IOException {
7465
if (suggester == null) {
7566
IndexInput indexInput = dictIn.slice("NRTSuggester", offset, dictIn.length() - offset);
76-
suggester = NRTSuggester.load(indexInput, fstLoadMode);
67+
suggester = NRTSuggester.load(indexInput);
7768
}
7869
return suggester;
7970
}

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggester.java

Lines changed: 6 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.Comparator;
2525
import java.util.List;
2626
import org.apache.lucene.search.suggest.analyzing.FSTUtil;
27-
import org.apache.lucene.search.suggest.document.CompletionPostingsFormat.FSTLoadMode;
2827
import org.apache.lucene.store.ByteArrayDataInput;
2928
import org.apache.lucene.store.ByteArrayDataOutput;
3029
import org.apache.lucene.store.IndexInput;
@@ -306,37 +305,15 @@ private static double calculateLiveDocRatio(int numDocs, int maxDocs) {
306305
return (numDocs > 0) ? ((double) numDocs / maxDocs) : -1;
307306
}
308307

309-
private static boolean shouldLoadFSTOffHeap(IndexInput input, FSTLoadMode fstLoadMode) {
310-
switch (fstLoadMode) {
311-
case ON_HEAP:
312-
return false;
313-
case OFF_HEAP:
314-
return true;
315-
case AUTO:
316-
// TODO: Make this less hacky to maybe expose "off-heap" feature using a marker interface on
317-
// the IndexInput
318-
return input.getClass().getName().contains(".MemorySegmentIndexInput");
319-
default:
320-
throw new IllegalStateException("unknown enum constant: " + fstLoadMode);
321-
}
322-
}
323-
324-
/**
325-
* Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} on or off-heap
326-
* depending on the provided <code>fstLoadMode</code>
327-
*/
328-
static NRTSuggester load(IndexInput input, FSTLoadMode fstLoadMode) throws IOException {
308+
/** Loads a {@link NRTSuggester} from {@link org.apache.lucene.store.IndexInput} */
309+
static NRTSuggester load(IndexInput input) throws IOException {
329310
final FST<Pair<Long, BytesRef>> fst;
330311
PairOutputs<Long, BytesRef> outputs =
331312
new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
332-
if (shouldLoadFSTOffHeap(input, fstLoadMode)) {
333-
final FST.FSTMetadata<Pair<Long, BytesRef>> fstMetadata = FST.readMetadata(input, outputs);
334-
OffHeapFSTStore store = new OffHeapFSTStore(input, input.getFilePointer(), fstMetadata);
335-
fst = FST.fromFSTReader(fstMetadata, store);
336-
input.skipBytes(store.size());
337-
} else {
338-
fst = new FST<>(FST.readMetadata(input, outputs), input);
339-
}
313+
final FST.FSTMetadata<Pair<Long, BytesRef>> fstMetadata = FST.readMetadata(input, outputs);
314+
OffHeapFSTStore store = new OffHeapFSTStore(input, input.getFilePointer(), fstMetadata);
315+
fst = FST.fromFSTReader(fstMetadata, store);
316+
input.skipBytes(store.size());
340317

341318
/* read some meta info */
342319
int maxAnalyzedPathsPerOutput = input.readVInt();

lucene/suggest/src/java/org/apache/lucene/search/suggest/document/NRTSuggesterBuilder.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,7 @@ public void finishTerm() throws IOException {
9999
entries.clear();
100100
}
101101

102-
/**
103-
* Builds and stores a FST that can be loaded with {@link NRTSuggester#load(IndexInput,
104-
* CompletionPostingsFormat.FSTLoadMode)})}
105-
*/
102+
/** Builds and stores a FST that can be loaded with {@link NRTSuggester#load(IndexInput)})} */
106103
public boolean store(DataOutput output) throws IOException {
107104
final FST<PairOutputs.Pair<Long, BytesRef>> fst =
108105
FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader());

lucene/suggest/src/test/org/apache/lucene/search/suggest/document/TestSuggestField.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@
2020
import static org.hamcrest.Matchers.containsString;
2121
import static org.hamcrest.Matchers.startsWith;
2222

23-
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
2423
import java.io.ByteArrayOutputStream;
2524
import java.io.IOException;
2625
import java.util.ArrayList;
@@ -949,9 +948,7 @@ static IndexWriterConfig iwcWithSuggestField(Analyzer analyzer, final Set<String
949948
iwc.setMergePolicy(newLogMergePolicy());
950949
Codec filterCodec =
951950
new FilterCodec(TestUtil.getDefaultCodec().getName(), TestUtil.getDefaultCodec()) {
952-
final CompletionPostingsFormat.FSTLoadMode fstLoadMode =
953-
RandomPicks.randomFrom(random(), CompletionPostingsFormat.FSTLoadMode.values());
954-
final PostingsFormat postingsFormat = new Completion101PostingsFormat(fstLoadMode);
951+
final PostingsFormat postingsFormat = new Completion101PostingsFormat();
955952

956953
@Override
957954
public PostingsFormat postingsFormat() {

0 commit comments

Comments
 (0)