apache · iverase · Mar 25, 2025 · Mar 24, 2025 · Mar 24, 2025 · Mar 24, 2025
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -28,7 +28,6 @@
 import org.apache.lucene.index.PointValues;
 import org.apache.lucene.index.PointValues.IntersectVisitor;
 import org.apache.lucene.index.PointValues.Relation;
-import org.apache.lucene.internal.hppc.LongArrayList;
 import org.apache.lucene.store.ByteBuffersDataOutput;
 import org.apache.lucene.store.ChecksumIndexInput;
 import org.apache.lucene.store.DataOutput;
@@ -43,9 +42,12 @@
 import org.apache.lucene.util.FixedLengthBytesRefArray;
 import org.apache.lucene.util.IORunnable;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LongValues;
 import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.bkd.BKDUtil.ByteArrayPredicate;
+import org.apache.lucene.util.packed.PackedInts;
+import org.apache.lucene.util.packed.PackedLongValues;
 
 // TODO
 //   - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy
@@ -582,8 +584,16 @@ private IORunnable writeFieldNDims(
 
     scratchBytesRef1.length = config.bytesPerDim();
     scratchBytesRef1.bytes = splitPackedValues;
+    final LongValues longValues =
+        new LongValues() {
+          @Override
+          public long get(long index) {
+            return leafBlockFPs[(int) index];
+          }
+        };
 
-    return makeWriter(metaOut, indexOut, splitDimensionValues, leafBlockFPs, dataStartFP);
+    return makeWriter(
+        metaOut, indexOut, splitDimensionValues, longValues, leafBlockFPs.length, dataStartFP);
   }
 
   /* In the 1D case, we can simply sort points in ascending order and use the
@@ -596,9 +606,12 @@ private IORunnable writeField1Dim(
       MutablePointTree reader)
       throws IOException {
     MutablePointTreeReaderUtils.sort(config, maxDoc, reader, 0, Math.toIntExact(reader.size()));
-
+    final int numLeaves =
+        Math.toIntExact(
+            (reader.size() + config.maxPointsInLeafNode() - 1) / config.maxPointsInLeafNode());
+    checkMaxLeafNodeCount(numLeaves);
     final OneDimensionBKDWriter oneDimWriter =
-        new OneDimensionBKDWriter(metaOut, indexOut, dataOut);
+        new OneDimensionBKDWriter(metaOut, indexOut, dataOut, new ArrayLongAccumulator(numLeaves));
 
     reader.visitDocValues(
         new IntersectVisitor() {
@@ -655,7 +668,8 @@ public IORunnable merge(
       }
     }
 
-    OneDimensionBKDWriter oneDimWriter = new OneDimensionBKDWriter(metaOut, indexOut, dataOut);
+    OneDimensionBKDWriter oneDimWriter =
+        new OneDimensionBKDWriter(metaOut, indexOut, dataOut, new PackedLongAccumulator());
 
     while (queue.size() != 0) {
       MergeReader reader = queue.top();
@@ -678,7 +692,7 @@ private class OneDimensionBKDWriter {
 
     final IndexOutput metaOut, indexOut, dataOut;
     final long dataStartFP;
-    final LongArrayList leafBlockFPs = new LongArrayList();
+    final LongAccumulator accumulator;
     final FixedLengthBytesRefArray leafBlockStartValues =
         new FixedLengthBytesRefArray(config.packedIndexBytesLength());
     final byte[] leafValues = new byte[config.maxPointsInLeafNode() * config.packedBytesLength()];
@@ -687,7 +701,11 @@ private class OneDimensionBKDWriter {
     private int leafCount;
     private int leafCardinality;
 
-    OneDimensionBKDWriter(IndexOutput metaOut, IndexOutput indexOut, IndexOutput dataOut) {
+    OneDimensionBKDWriter(
+        IndexOutput metaOut,
+        IndexOutput indexOut,
+        IndexOutput dataOut,
+        LongAccumulator accumulator) {
       if (config.numIndexDims() != 1) {
         throw new UnsupportedOperationException(
             "config.numIndexDims() must be 1 but got " + config.numIndexDims());
@@ -708,7 +726,7 @@ private class OneDimensionBKDWriter {
       this.indexOut = indexOut;
       this.dataOut = dataOut;
       this.dataStartFP = dataOut.getFilePointer();
-
+      this.accumulator = accumulator;
       lastPackedValue = new byte[config.packedBytesLength()];
     }
 
@@ -772,12 +790,13 @@ public IORunnable finish() throws IOException {
 
       scratchBytesRef1.length = config.packedIndexBytesLength();
       scratchBytesRef1.offset = 0;
-      assert leafBlockStartValues.size() + 1 == leafBlockFPs.size();
+      assert leafBlockStartValues.size() + 1 == accumulator.size();
+      final LongValues longValues = accumulator.getValues();
       BKDTreeLeafNodes leafNodes =
           new BKDTreeLeafNodes() {
             @Override
             public long getLeafLP(int index) {
-              return leafBlockFPs.get(index);
+              return longValues.get(index);
             }
 
             @Override
@@ -792,7 +811,7 @@ public int getSplitDimension(int index) {
 
             @Override
             public int numLeaves() {
-              return leafBlockFPs.size();
+              return Math.toIntExact(accumulator.size());
             }
           };
       return () -> {
@@ -814,16 +833,16 @@ private void writeLeafBlock(int leafCardinality) throws IOException {
 
       valueCount += leafCount;
 
-      if (leafBlockFPs.size() > 0) {
+      if (accumulator.size() > 0) {
         // Save the first (minimum) value in each leaf block except the first, to build the split
         // value index in the end:
         scratchBytesRef1.bytes = leafValues;
         scratchBytesRef1.offset = 0;
         scratchBytesRef1.length = config.packedIndexBytesLength();
         leafBlockStartValues.append(scratchBytesRef1);
       }
-      leafBlockFPs.add(dataOut.getFilePointer());
-      checkMaxLeafNodeCount(leafBlockFPs.size());
+      accumulator.add(dataOut.getFilePointer());
+      checkMaxLeafNodeCount(Math.toIntExact(accumulator.size()));
 
       // Find per-dim common prefix:
       commonPrefixLengths[0] =
@@ -858,6 +877,65 @@ assert valuesInOrderAndBounds(
     }
   }
 
+  private interface LongAccumulator {
+    void add(long value);
+
+    long size();
+
+    LongValues getValues();
+  }
+
+  private static class ArrayLongAccumulator implements LongAccumulator {
+
+    final long[] values;
+    int count;
+
+    ArrayLongAccumulator(int size) {
+      this.values = new long[size];
+    }
+
+    @Override
+    public void add(long value) {
+      values[count++] = value;
+    }
+
+    @Override
+    public long size() {
+      return count;
+    }
+
+    @Override
+    public LongValues getValues() {
+      assert count == values.length;
+      return new LongValues() {
+        @Override
+        public long get(long index) {
+          return values[(int) index];
+        }
+      };
+    }
+  }
+
+  private static class PackedLongAccumulator implements LongAccumulator {
+    private final PackedLongValues.Builder values =
+        PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
+
+    @Override
+    public void add(long value) {
+      values.add(value);
+    }
+
+    @Override
+    public long size() {
+      return values.size();
+    }
+
+    @Override
+    public LongValues getValues() {
+      return values.build();
+    }
+  }
+
   private int getNumLeftLeafNodes(int numLeaves) {
     assert numLeaves > 1 : "getNumLeftLeaveNodes() called with " + numLeaves;
     // return the level that can be filled with this number of leaves
@@ -955,7 +1033,7 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
 
     // +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
     // 7)
-    long[] leafBlockFPs = new long[numLeaves];
+    PackedLongAccumulator leafBlockFPs = new PackedLongAccumulator();
 
     // Make sure the math above "worked":
     assert pointCount / numLeaves <= config.maxPointsInLeafNode()
@@ -987,8 +1065,10 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
           splitPackedValues,
           splitDimensionValues,
           leafBlockFPs,
+          numLeaves,
           new int[config.maxPointsInLeafNode()]);
       assert Arrays.equals(parentSplits, new int[config.numIndexDims()]);
+      assert leafBlockFPs.size() == numLeaves;
 
       // If no exception, we should have cleaned everything up:
       assert tempDir.getCreatedFiles().isEmpty();
@@ -1002,22 +1082,30 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
       }
     }
 
+    LongValues values = leafBlockFPs.getValues();
     scratchBytesRef1.bytes = splitPackedValues;
     scratchBytesRef1.length = config.bytesPerDim();
-    return makeWriter(metaOut, indexOut, splitDimensionValues, leafBlockFPs, dataStartFP);
+    return makeWriter(
+        metaOut,
+        indexOut,
+        splitDimensionValues,
+        values,
+        Math.toIntExact(leafBlockFPs.size()),
+        dataStartFP);
   }
 
   private IORunnable makeWriter(
       IndexOutput metaOut,
       IndexOutput indexOut,
       byte[] splitDimensionValues,
-      long[] leafBlockFPs,
+      LongValues leafBlockFPs,
+      int numLeaves,
       long dataStartFP) {
     BKDTreeLeafNodes leafNodes =
         new BKDTreeLeafNodes() {
           @Override
           public long getLeafLP(int index) {
-            return leafBlockFPs[index];
+            return leafBlockFPs.get(index);
           }
 
           @Override
@@ -1033,7 +1121,7 @@ public int getSplitDimension(int index) {
 
           @Override
           public int numLeaves() {
-            return leafBlockFPs.length;
+            return numLeaves;
           }
         };
 
@@ -1903,7 +1991,8 @@ private void build(
       int[] parentSplits,
       byte[] splitPackedValues,
       byte[] splitDimensionValues,
-      long[] leafBlockFPs,
+      LongAccumulator leafBlockFPs,
+      int totalNumLeaves,
       int[] spareDocIds)
       throws IOException {
 
@@ -1961,7 +2050,7 @@ private void build(
       int leafCardinality = heapSource.computeCardinality(from, to, commonPrefixLengths);
 
       // Save the block file pointer:
-      leafBlockFPs[leavesOffset] = out.getFilePointer();
+      leafBlockFPs.add(out.getFilePointer());
       // System.out.println("  write leaf block @ fp=" + out.getFilePointer());
 
       // Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
@@ -2003,16 +2092,16 @@ assert valuesInOrderAndBounds(
         // split dimensions. Because it is an expensive operation, the frequency we recompute the
         // bounds is given
         // by SPLITS_BEFORE_EXACT_BOUNDS.
-        if (numLeaves != leafBlockFPs.length
+        if (numLeaves != totalNumLeaves
             && config.numIndexDims() > 2
             && Arrays.stream(parentSplits).sum() % SPLITS_BEFORE_EXACT_BOUNDS == 0) {
           computePackedValueBounds(points, minPackedValue, maxPackedValue);
         }
         splitDim = split(minPackedValue, maxPackedValue, parentSplits);
       }
 
-      assert numLeaves <= leafBlockFPs.length
-          : "numLeaves=" + numLeaves + " leafBlockFPs.length=" + leafBlockFPs.length;
+      assert numLeaves <= totalNumLeaves
+          : "numLeaves=" + numLeaves + " totalNumLeaves=" + totalNumLeaves;
 
       // How many leaves will be in the left tree:
       final int numLeftLeafNodes = getNumLeftLeafNodes(numLeaves);
@@ -2078,6 +2167,7 @@ assert valuesInOrderAndBounds(
           splitPackedValues,
           splitDimensionValues,
           leafBlockFPs,
+          totalNumLeaves,
           spareDocIds);
 
       // Recurse on right tree:
@@ -2093,6 +2183,7 @@ assert valuesInOrderAndBounds(
           splitPackedValues,
           splitDimensionValues,
           leafBlockFPs,
+          totalNumLeaves,
           spareDocIds);
 
       parentSplits[splitDim]--;