Skip to content

Pack file pointers when merging BKD trees #14393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 25, 2025
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 115 additions & 24 deletions lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.lucene.index.PointValues;
import org.apache.lucene.index.PointValues.IntersectVisitor;
import org.apache.lucene.index.PointValues.Relation;
import org.apache.lucene.internal.hppc.LongArrayList;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.DataOutput;
Expand All @@ -43,9 +42,12 @@
import org.apache.lucene.util.FixedLengthBytesRefArray;
import org.apache.lucene.util.IORunnable;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.bkd.BKDUtil.ByteArrayPredicate;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;

// TODO
// - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy
Expand Down Expand Up @@ -582,8 +584,16 @@ private IORunnable writeFieldNDims(

scratchBytesRef1.length = config.bytesPerDim();
scratchBytesRef1.bytes = splitPackedValues;
final LongValues longValues =
new LongValues() {
@Override
public long get(long index) {
return leafBlockFPs[(int) index];
}
};

return makeWriter(metaOut, indexOut, splitDimensionValues, leafBlockFPs, dataStartFP);
return makeWriter(
metaOut, indexOut, splitDimensionValues, longValues, leafBlockFPs.length, dataStartFP);
}

/* In the 1D case, we can simply sort points in ascending order and use the
Expand All @@ -596,9 +606,12 @@ private IORunnable writeField1Dim(
MutablePointTree reader)
throws IOException {
MutablePointTreeReaderUtils.sort(config, maxDoc, reader, 0, Math.toIntExact(reader.size()));

final int numLeaves =
Math.toIntExact(
(reader.size() + config.maxPointsInLeafNode() - 1) / config.maxPointsInLeafNode());
checkMaxLeafNodeCount(numLeaves);
final OneDimensionBKDWriter oneDimWriter =
new OneDimensionBKDWriter(metaOut, indexOut, dataOut);
new OneDimensionBKDWriter(metaOut, indexOut, dataOut, new ArrayLongAccumulator(numLeaves));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could/should we use the "packed" impl all the time?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could, I am concerned on the overhead of using it when flushing small segments.
Maybe it is not worthy the complexity of specializing for that case and we should use the packed impl all the time. I am ok either way.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Intuitively, there is ~512x (the leaf node size) more data than file pointers, so the bottleneck of flushing/merging should be on the data rather than on file pointers. So having small overhead on file pointers should be ok (hopefully benchmarks will confirm this).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, let's use it all the time for OneDimensionBKDWriter, it clearly simplifies the implementation


reader.visitDocValues(
new IntersectVisitor() {
Expand Down Expand Up @@ -655,7 +668,8 @@ public IORunnable merge(
}
}

OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(metaOut, indexOut, dataOut);
OneDimensionBKDWriter oneDimWriter =
new OneDimensionBKDWriter(metaOut, indexOut, dataOut, new PackedLongAccumulator());

while (queue.size() != 0) {
MergeReader reader = queue.top();
Expand All @@ -678,7 +692,7 @@ private class OneDimensionBKDWriter {

final IndexOutput metaOut, indexOut, dataOut;
final long dataStartFP;
final LongArrayList leafBlockFPs = new LongArrayList();
final LongAccumulator accumulator;
final FixedLengthBytesRefArray leafBlockStartValues =
new FixedLengthBytesRefArray(config.packedIndexBytesLength());
final byte[] leafValues = new byte[config.maxPointsInLeafNode() * config.packedBytesLength()];
Expand All @@ -687,7 +701,11 @@ private class OneDimensionBKDWriter {
private int leafCount;
private int leafCardinality;

OneDimensionBKDWriter(IndexOutput metaOut, IndexOutput indexOut, IndexOutput dataOut) {
OneDimensionBKDWriter(
IndexOutput metaOut,
IndexOutput indexOut,
IndexOutput dataOut,
LongAccumulator accumulator) {
if (config.numIndexDims() != 1) {
throw new UnsupportedOperationException(
"config.numIndexDims() must be 1 but got " + config.numIndexDims());
Expand All @@ -708,7 +726,7 @@ private class OneDimensionBKDWriter {
this.indexOut = indexOut;
this.dataOut = dataOut;
this.dataStartFP = dataOut.getFilePointer();

this.accumulator = accumulator;
lastPackedValue = new byte[config.packedBytesLength()];
}

Expand Down Expand Up @@ -772,12 +790,13 @@ public IORunnable finish() throws IOException {

scratchBytesRef1.length = config.packedIndexBytesLength();
scratchBytesRef1.offset = 0;
assert leafBlockStartValues.size() + 1 == leafBlockFPs.size();
assert leafBlockStartValues.size() + 1 == accumulator.size();
final LongValues longValues = accumulator.getValues();
BKDTreeLeafNodes leafNodes =
new BKDTreeLeafNodes() {
@Override
public long getLeafLP(int index) {
return leafBlockFPs.get(index);
return longValues.get(index);
}

@Override
Expand All @@ -792,7 +811,7 @@ public int getSplitDimension(int index) {

@Override
public int numLeaves() {
return leafBlockFPs.size();
return Math.toIntExact(accumulator.size());
}
};
return () -> {
Expand All @@ -814,16 +833,16 @@ private void writeLeafBlock(int leafCardinality) throws IOException {

valueCount += leafCount;

if (leafBlockFPs.size() > 0) {
if (accumulator.size() > 0) {
// Save the first (minimum) value in each leaf block except the first, to build the split
// value index in the end:
scratchBytesRef1.bytes = leafValues;
scratchBytesRef1.offset = 0;
scratchBytesRef1.length = config.packedIndexBytesLength();
leafBlockStartValues.append(scratchBytesRef1);
}
leafBlockFPs.add(dataOut.getFilePointer());
checkMaxLeafNodeCount(leafBlockFPs.size());
accumulator.add(dataOut.getFilePointer());
checkMaxLeafNodeCount(Math.toIntExact(accumulator.size()));

// Find per-dim common prefix:
commonPrefixLengths[0] =
Expand Down Expand Up @@ -858,6 +877,65 @@ assert valuesInOrderAndBounds(
}
}

private interface LongAccumulator {
void add(long value);

long size();

LongValues getValues();
}

private static class ArrayLongAccumulator implements LongAccumulator {

final long[] values;
int count;

ArrayLongAccumulator(int size) {
this.values = new long[size];
}

@Override
public void add(long value) {
values[count++] = value;
}

@Override
public long size() {
return count;
}

@Override
public LongValues getValues() {
assert count == values.length;
return new LongValues() {
@Override
public long get(long index) {
return values[(int) index];
}
};
}
}

private static class PackedLongAccumulator implements LongAccumulator {
private final PackedLongValues.Builder values =
PackedLongValues.monotonicBuilder(PackedInts.COMPACT);

@Override
public void add(long value) {
values.add(value);
}

@Override
public long size() {
return values.size();
}

@Override
public LongValues getValues() {
return values.build();
}
}

private int getNumLeftLeafNodes(int numLeaves) {
assert numLeaves > 1 : "getNumLeftLeaveNodes() called with " + numLeaves;
// return the level that can be filled with this number of leaves
Expand Down Expand Up @@ -955,7 +1033,7 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput

// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
// 7)
long[] leafBlockFPs = new long[numLeaves];
PackedLongAccumulator leafBlockFPs = new PackedLongAccumulator();

// Make sure the math above "worked":
assert pointCount / numLeaves <= config.maxPointsInLeafNode()
Expand Down Expand Up @@ -987,8 +1065,10 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
splitPackedValues,
splitDimensionValues,
leafBlockFPs,
numLeaves,
new int[config.maxPointsInLeafNode()]);
assert Arrays.equals(parentSplits, new int[config.numIndexDims()]);
assert leafBlockFPs.size() == numLeaves;

// If no exception, we should have cleaned everything up:
assert tempDir.getCreatedFiles().isEmpty();
Expand All @@ -1002,22 +1082,30 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
}
}

LongValues values = leafBlockFPs.getValues();
scratchBytesRef1.bytes = splitPackedValues;
scratchBytesRef1.length = config.bytesPerDim();
return makeWriter(metaOut, indexOut, splitDimensionValues, leafBlockFPs, dataStartFP);
return makeWriter(
metaOut,
indexOut,
splitDimensionValues,
values,
Math.toIntExact(leafBlockFPs.size()),
dataStartFP);
}

private IORunnable makeWriter(
IndexOutput metaOut,
IndexOutput indexOut,
byte[] splitDimensionValues,
long[] leafBlockFPs,
LongValues leafBlockFPs,
int numLeaves,
long dataStartFP) {
BKDTreeLeafNodes leafNodes =
new BKDTreeLeafNodes() {
@Override
public long getLeafLP(int index) {
return leafBlockFPs[index];
return leafBlockFPs.get(index);
}

@Override
Expand All @@ -1033,7 +1121,7 @@ public int getSplitDimension(int index) {

@Override
public int numLeaves() {
return leafBlockFPs.length;
return numLeaves;
}
};

Expand Down Expand Up @@ -1903,7 +1991,8 @@ private void build(
int[] parentSplits,
byte[] splitPackedValues,
byte[] splitDimensionValues,
long[] leafBlockFPs,
LongAccumulator leafBlockFPs,
int totalNumLeaves,
int[] spareDocIds)
throws IOException {

Expand Down Expand Up @@ -1961,7 +2050,7 @@ private void build(
int leafCardinality = heapSource.computeCardinality(from, to, commonPrefixLengths);

// Save the block file pointer:
leafBlockFPs[leavesOffset] = out.getFilePointer();
leafBlockFPs.add(out.getFilePointer());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, since filepointers are monotonic, we can make the compact. NICE!

// System.out.println(" write leaf block @ fp=" + out.getFilePointer());

// Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
Expand Down Expand Up @@ -2003,16 +2092,16 @@ assert valuesInOrderAndBounds(
// split dimensions. Because it is an expensive operation, the frequency we recompute the
// bounds is given
// by SPLITS_BEFORE_EXACT_BOUNDS.
if (numLeaves != leafBlockFPs.length
if (numLeaves != totalNumLeaves
&& config.numIndexDims() > 2
&& Arrays.stream(parentSplits).sum() % SPLITS_BEFORE_EXACT_BOUNDS == 0) {
computePackedValueBounds(points, minPackedValue, maxPackedValue);
}
splitDim = split(minPackedValue, maxPackedValue, parentSplits);
}

assert numLeaves <= leafBlockFPs.length
: "numLeaves=" + numLeaves + " leafBlockFPs.length=" + leafBlockFPs.length;
assert numLeaves <= totalNumLeaves
: "numLeaves=" + numLeaves + " totalNumLeaves=" + totalNumLeaves;

// How many leaves will be in the left tree:
final int numLeftLeafNodes = getNumLeftLeafNodes(numLeaves);
Expand Down Expand Up @@ -2078,6 +2167,7 @@ assert valuesInOrderAndBounds(
splitPackedValues,
splitDimensionValues,
leafBlockFPs,
totalNumLeaves,
spareDocIds);

// Recurse on right tree:
Expand All @@ -2093,6 +2183,7 @@ assert valuesInOrderAndBounds(
splitPackedValues,
splitDimensionValues,
leafBlockFPs,
totalNumLeaves,
spareDocIds);

parentSplits[splitDim]--;
Expand Down