Skip to content

Commit 9472dca

Browse files
authored
Decode doc ids in BKD leaves with auto-vectorized loops (#14203)
1 parent 565d0e7 commit 9472dca

File tree

8 files changed

+272
-61
lines changed

8 files changed

+272
-61
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ Optimizations
178178

179179
* GITHUB#14301: Avoid unnecessary evaluations and skipping documents. (hanbj)
180180

181+
# GITHUB#14203: Decode doc ids in BKD leaves with auto-vectorized loops when using DEFAULT_MAX_POINTS_IN_LEAF_NODE. (Guo Feng)
182+
181183
Bug Fixes
182184
---------------------
183185

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717
package org.apache.lucene.codecs.lucene90;
1818

1919
import java.io.IOException;
20+
import java.util.Map;
2021
import org.apache.lucene.codecs.PointsFormat;
2122
import org.apache.lucene.codecs.PointsReader;
2223
import org.apache.lucene.codecs.PointsWriter;
2324
import org.apache.lucene.index.SegmentReadState;
2425
import org.apache.lucene.index.SegmentWriteState;
26+
import org.apache.lucene.util.bkd.BKDWriter;
2527

2628
/**
2729
* Lucene 9.0 point format, which encodes dimensional values in a block KD-tree structure for fast
@@ -59,18 +61,40 @@ public final class Lucene90PointsFormat extends PointsFormat {
5961
public static final String META_EXTENSION = "kdm";
6062

6163
static final int VERSION_START = 0;
62-
static final int VERSION_CURRENT = VERSION_START;
64+
static final int VERSION_BKD_VECTORIZED_BPV24 = 1;
65+
static final int VERSION_CURRENT = VERSION_BKD_VECTORIZED_BPV24;
66+
67+
private static final Map<Integer, Integer> VERSION_TO_BKD_VERSION =
68+
Map.of(
69+
VERSION_START, BKDWriter.VERSION_META_FILE,
70+
VERSION_BKD_VECTORIZED_BPV24, BKDWriter.VERSION_VECTORIZED_DOCID);
71+
72+
private final int version;
6373

6474
/** Sole constructor */
65-
public Lucene90PointsFormat() {}
75+
public Lucene90PointsFormat() {
76+
this(VERSION_CURRENT);
77+
}
78+
79+
/** Constructor that takes a version. This is used for testing with older versions. */
80+
Lucene90PointsFormat(int version) {
81+
if (VERSION_TO_BKD_VERSION.containsKey(version) == false) {
82+
throw new IllegalArgumentException("Invalid version: " + version);
83+
}
84+
this.version = version;
85+
}
6686

6787
@Override
6888
public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
69-
return new Lucene90PointsWriter(state);
89+
return new Lucene90PointsWriter(state, version);
7090
}
7191

7292
@Override
7393
public PointsReader fieldsReader(SegmentReadState state) throws IOException {
7494
return new Lucene90PointsReader(state);
7595
}
96+
97+
static int bkdVersion(int version) {
98+
return VERSION_TO_BKD_VERSION.get(version);
99+
}
76100
}

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,16 +46,18 @@ public class Lucene90PointsWriter extends PointsWriter {
4646
final SegmentWriteState writeState;
4747
final int maxPointsInLeafNode;
4848
final double maxMBSortInHeap;
49+
final int version;
4950
private boolean finished;
5051

5152
/** Full constructor */
5253
public Lucene90PointsWriter(
53-
SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
54+
SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap, int version)
5455
throws IOException {
5556
assert writeState.fieldInfos.hasPointValues();
5657
this.writeState = writeState;
5758
this.maxPointsInLeafNode = maxPointsInLeafNode;
5859
this.maxMBSortInHeap = maxMBSortInHeap;
60+
this.version = version;
5961
String dataFileName =
6062
IndexFileNames.segmentFileName(
6163
writeState.segmentInfo.name,
@@ -105,6 +107,12 @@ public Lucene90PointsWriter(
105107
}
106108
}
107109

110+
public Lucene90PointsWriter(
111+
SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
112+
throws IOException {
113+
this(writeState, maxPointsInLeafNode, maxMBSortInHeap, Lucene90PointsFormat.VERSION_CURRENT);
114+
}
115+
108116
/**
109117
* Uses the defaults values for {@code maxPointsInLeafNode} (512) and {@code maxMBSortInHeap}
110118
* (16.0)
@@ -113,7 +121,17 @@ public Lucene90PointsWriter(SegmentWriteState writeState) throws IOException {
113121
this(
114122
writeState,
115123
BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
116-
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
124+
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
125+
Lucene90PointsFormat.VERSION_CURRENT);
126+
}
127+
128+
/** Constructor that takes a version. This is used for testing with older versions. */
129+
Lucene90PointsWriter(SegmentWriteState writeState, int version) throws IOException {
130+
this(
131+
writeState,
132+
BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
133+
BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
134+
version);
117135
}
118136

119137
@Override
@@ -135,7 +153,8 @@ public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOExcept
135153
writeState.segmentInfo.name,
136154
config,
137155
maxMBSortInHeap,
138-
values.size())) {
156+
values.size(),
157+
Lucene90PointsFormat.bkdVersion(version))) {
139158

140159
if (values instanceof MutablePointTree) {
141160
IORunnable finalizer =
@@ -233,7 +252,8 @@ public void merge(MergeState mergeState) throws IOException {
233252
writeState.segmentInfo.name,
234253
config,
235254
maxMBSortInHeap,
236-
totMaxSize)) {
255+
totMaxSize,
256+
Lucene90PointsFormat.bkdVersion(version))) {
237257
List<PointValues> pointValues = new ArrayList<>();
238258
List<MergeState.DocMap> docMaps = new ArrayList<>();
239259
for (int i = 0; i < mergeState.pointsReaders.length; i++) {

lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@
3434
* @lucene.experimental
3535
*/
3636
public class BKDReader extends PointValues {
37-
3837
final BKDConfig config;
3938
final int numLeaves;
4039
final IndexInput in;
@@ -269,7 +268,7 @@ private BKDPointTree(
269268
1,
270269
minPackedValue,
271270
maxPackedValue,
272-
new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode()),
271+
new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode(), version),
273272
new byte[config.packedBytesLength()],
274273
new byte[config.packedIndexBytesLength()],
275274
new byte[config.packedIndexBytesLength()],
@@ -598,7 +597,8 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I
598597
// How many points are stored in this leaf cell:
599598
int count = leafNodes.readVInt();
600599
// No need to call grow(), it has been called up-front
601-
docIdsWriter.readInts(leafNodes, count, visitor);
600+
// Borrow scratchIterator.docIds as decoding buffer
601+
docIdsWriter.readInts(leafNodes, count, visitor, scratchIterator.docIDs);
602602
} else {
603603
pushLeft();
604604
addAll(visitor, grown);
@@ -1036,9 +1036,9 @@ private static class BKDReaderDocIDSetIterator extends DocIdSetIterator {
10361036
final int[] docIDs;
10371037
private final DocIdsWriter docIdsWriter;
10381038

1039-
public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
1039+
public BKDReaderDocIDSetIterator(int maxPointsInLeafNode, int version) {
10401040
this.docIDs = new int[maxPointsInLeafNode];
1041-
this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode);
1041+
this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode, version);
10421042
}
10431043

10441044
@Override

lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,8 @@ public class BKDWriter implements Closeable {
8585
public static final int VERSION_SELECTIVE_INDEXING = 6;
8686
public static final int VERSION_LOW_CARDINALITY_LEAVES = 7;
8787
public static final int VERSION_META_FILE = 9;
88-
public static final int VERSION_CURRENT = VERSION_META_FILE;
88+
public static final int VERSION_VECTORIZED_DOCID = 10;
89+
public static final int VERSION_CURRENT = VERSION_VECTORIZED_DOCID;
8990

9091
/** Number of splits before we compute the exact bounding box of an inner node. */
9192
private static final int SPLITS_BEFORE_EXACT_BOUNDS = 4;
@@ -103,6 +104,7 @@ public class BKDWriter implements Closeable {
103104
final TrackingDirectoryWrapper tempDir;
104105
final String tempFileNamePrefix;
105106
final double maxMBSortInHeap;
107+
final int version;
106108

107109
final byte[] scratchDiff;
108110
final byte[] scratch;
@@ -139,6 +141,29 @@ public BKDWriter(
139141
BKDConfig config,
140142
double maxMBSortInHeap,
141143
long totalPointCount) {
144+
this(
145+
maxDoc,
146+
tempDir,
147+
tempFileNamePrefix,
148+
config,
149+
maxMBSortInHeap,
150+
totalPointCount,
151+
BKDWriter.VERSION_CURRENT);
152+
}
153+
154+
/** This ctor should be only used for testing with older versions. */
155+
public BKDWriter(
156+
int maxDoc,
157+
Directory tempDir,
158+
String tempFileNamePrefix,
159+
BKDConfig config,
160+
double maxMBSortInHeap,
161+
long totalPointCount,
162+
int version) {
163+
if (version < VERSION_START || version > VERSION_CURRENT) {
164+
throw new IllegalArgumentException("Version out of range: " + version);
165+
}
166+
this.version = version;
142167
verifyParams(maxMBSortInHeap, totalPointCount);
143168
// We use tracking dir to deal with removing files on exception, so each place that
144169
// creates temp files doesn't need crazy try/finally/sucess logic:
@@ -165,7 +190,7 @@ public BKDWriter(
165190

166191
// Maximum number of points we hold in memory at any time
167192
maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc()));
168-
docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode());
193+
docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode(), version);
169194
// Finally, we must be able to hold at least the leaf node in heap during build:
170195
if (maxPointsSortInHeap < config.maxPointsInLeafNode()) {
171196
throw new IllegalArgumentException(
@@ -1245,7 +1270,7 @@ private void writeIndex(
12451270
byte[] packedIndex,
12461271
long dataStartFP)
12471272
throws IOException {
1248-
CodecUtil.writeHeader(metaOut, CODEC_NAME, VERSION_CURRENT);
1273+
CodecUtil.writeHeader(metaOut, CODEC_NAME, version);
12491274
metaOut.writeVInt(config.numDims());
12501275
metaOut.writeVInt(config.numIndexDims());
12511276
metaOut.writeVInt(countPerLeaf);

0 commit comments

Comments
 (0)