28
28
import org .apache .lucene .index .PointValues ;
29
29
import org .apache .lucene .index .PointValues .IntersectVisitor ;
30
30
import org .apache .lucene .index .PointValues .Relation ;
31
- import org .apache .lucene .internal .hppc .LongArrayList ;
32
31
import org .apache .lucene .store .ByteBuffersDataOutput ;
33
32
import org .apache .lucene .store .ChecksumIndexInput ;
34
33
import org .apache .lucene .store .DataOutput ;
43
42
import org .apache .lucene .util .FixedLengthBytesRefArray ;
44
43
import org .apache .lucene .util .IORunnable ;
45
44
import org .apache .lucene .util .IOUtils ;
45
+ import org .apache .lucene .util .LongValues ;
46
46
import org .apache .lucene .util .NumericUtils ;
47
47
import org .apache .lucene .util .PriorityQueue ;
48
48
import org .apache .lucene .util .bkd .BKDUtil .ByteArrayPredicate ;
49
+ import org .apache .lucene .util .packed .PackedInts ;
50
+ import org .apache .lucene .util .packed .PackedLongValues ;
49
51
50
52
// TODO
51
53
// - allow variable length byte[] (across docs and dims), but this is quite a bit more hairy
@@ -582,8 +584,21 @@ private IORunnable writeFieldNDims(
582
584
583
585
scratchBytesRef1 .length = config .bytesPerDim ();
584
586
scratchBytesRef1 .bytes = splitPackedValues ;
587
+ final LongValues leafFPLongValues =
588
+ new LongValues () {
589
+ @ Override
590
+ public long get (long index ) {
591
+ return leafBlockFPs [(int ) index ];
592
+ }
593
+ };
585
594
586
- return makeWriter (metaOut , indexOut , splitDimensionValues , leafBlockFPs , dataStartFP );
595
+ return makeWriter (
596
+ metaOut ,
597
+ indexOut ,
598
+ splitDimensionValues ,
599
+ leafFPLongValues ,
600
+ leafBlockFPs .length ,
601
+ dataStartFP );
587
602
}
588
603
589
604
/* In the 1D case, we can simply sort points in ascending order and use the
@@ -678,7 +693,8 @@ private class OneDimensionBKDWriter {
678
693
679
694
final IndexOutput metaOut , indexOut , dataOut ;
680
695
final long dataStartFP ;
681
- final LongArrayList leafBlockFPs = new LongArrayList ();
696
+ private final PackedLongValues .Builder leafBlockFPs =
697
+ PackedLongValues .monotonicBuilder (PackedInts .COMPACT );
682
698
final FixedLengthBytesRefArray leafBlockStartValues =
683
699
new FixedLengthBytesRefArray (config .packedIndexBytesLength ());
684
700
final byte [] leafValues = new byte [config .maxPointsInLeafNode () * config .packedBytesLength ()];
@@ -708,7 +724,6 @@ private class OneDimensionBKDWriter {
708
724
this .indexOut = indexOut ;
709
725
this .dataOut = dataOut ;
710
726
this .dataStartFP = dataOut .getFilePointer ();
711
-
712
727
lastPackedValue = new byte [config .packedBytesLength ()];
713
728
}
714
729
@@ -773,11 +788,12 @@ public IORunnable finish() throws IOException {
773
788
scratchBytesRef1 .length = config .packedIndexBytesLength ();
774
789
scratchBytesRef1 .offset = 0 ;
775
790
assert leafBlockStartValues .size () + 1 == leafBlockFPs .size ();
791
+ final LongValues leafFPLongValues = leafBlockFPs .build ();
776
792
BKDTreeLeafNodes leafNodes =
777
793
new BKDTreeLeafNodes () {
778
794
@ Override
779
795
public long getLeafLP (int index ) {
780
- return leafBlockFPs .get (index );
796
+ return leafFPLongValues .get (index );
781
797
}
782
798
783
799
@ Override
@@ -792,7 +808,7 @@ public int getSplitDimension(int index) {
792
808
793
809
@ Override
794
810
public int numLeaves () {
795
- return leafBlockFPs .size ();
811
+ return Math . toIntExact ( leafBlockFPs .size () );
796
812
}
797
813
};
798
814
return () -> {
@@ -823,7 +839,7 @@ private void writeLeafBlock(int leafCardinality) throws IOException {
823
839
leafBlockStartValues .append (scratchBytesRef1 );
824
840
}
825
841
leafBlockFPs .add (dataOut .getFilePointer ());
826
- checkMaxLeafNodeCount (leafBlockFPs .size ());
842
+ checkMaxLeafNodeCount (Math . toIntExact ( leafBlockFPs .size () ));
827
843
828
844
// Find per-dim common prefix:
829
845
commonPrefixLengths [0 ] =
@@ -955,7 +971,8 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
955
971
956
972
// +1 because leaf count is power of 2 (e.g. 8), and innerNodeCount is power of 2 minus 1 (e.g.
957
973
// 7)
958
- long [] leafBlockFPs = new long [numLeaves ];
974
+ final PackedLongValues .Builder leafBlockFPs =
975
+ PackedLongValues .monotonicBuilder (PackedInts .COMPACT );
959
976
960
977
// Make sure the math above "worked":
961
978
assert pointCount / numLeaves <= config .maxPointsInLeafNode ()
@@ -987,8 +1004,10 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
987
1004
splitPackedValues ,
988
1005
splitDimensionValues ,
989
1006
leafBlockFPs ,
1007
+ numLeaves ,
990
1008
new int [config .maxPointsInLeafNode ()]);
991
1009
assert Arrays .equals (parentSplits , new int [config .numIndexDims ()]);
1010
+ assert leafBlockFPs .size () == numLeaves ;
992
1011
993
1012
// If no exception, we should have cleaned everything up:
994
1013
assert tempDir .getCreatedFiles ().isEmpty ();
@@ -1002,22 +1021,30 @@ public IORunnable finish(IndexOutput metaOut, IndexOutput indexOut, IndexOutput
1002
1021
}
1003
1022
}
1004
1023
1024
+ LongValues leafBlockLongValues = leafBlockFPs .build ();
1005
1025
scratchBytesRef1 .bytes = splitPackedValues ;
1006
1026
scratchBytesRef1 .length = config .bytesPerDim ();
1007
- return makeWriter (metaOut , indexOut , splitDimensionValues , leafBlockFPs , dataStartFP );
1027
+ return makeWriter (
1028
+ metaOut ,
1029
+ indexOut ,
1030
+ splitDimensionValues ,
1031
+ leafBlockLongValues ,
1032
+ Math .toIntExact (leafBlockFPs .size ()),
1033
+ dataStartFP );
1008
1034
}
1009
1035
1010
1036
private IORunnable makeWriter (
1011
1037
IndexOutput metaOut ,
1012
1038
IndexOutput indexOut ,
1013
1039
byte [] splitDimensionValues ,
1014
- long [] leafBlockFPs ,
1040
+ LongValues leafBlockFPs ,
1041
+ int numLeaves ,
1015
1042
long dataStartFP ) {
1016
1043
BKDTreeLeafNodes leafNodes =
1017
1044
new BKDTreeLeafNodes () {
1018
1045
@ Override
1019
1046
public long getLeafLP (int index ) {
1020
- return leafBlockFPs [ index ] ;
1047
+ return leafBlockFPs . get ( index ) ;
1021
1048
}
1022
1049
1023
1050
@ Override
@@ -1033,7 +1060,7 @@ public int getSplitDimension(int index) {
1033
1060
1034
1061
@ Override
1035
1062
public int numLeaves () {
1036
- return leafBlockFPs . length ;
1063
+ return numLeaves ;
1037
1064
}
1038
1065
};
1039
1066
@@ -1903,7 +1930,8 @@ private void build(
1903
1930
int [] parentSplits ,
1904
1931
byte [] splitPackedValues ,
1905
1932
byte [] splitDimensionValues ,
1906
- long [] leafBlockFPs ,
1933
+ PackedLongValues .Builder leafBlockFPs ,
1934
+ int totalNumLeaves ,
1907
1935
int [] spareDocIds )
1908
1936
throws IOException {
1909
1937
@@ -1961,7 +1989,7 @@ private void build(
1961
1989
int leafCardinality = heapSource .computeCardinality (from , to , commonPrefixLengths );
1962
1990
1963
1991
// Save the block file pointer:
1964
- leafBlockFPs [ leavesOffset ] = out .getFilePointer ();
1992
+ leafBlockFPs . add ( out .getFilePointer () );
1965
1993
// System.out.println(" write leaf block @ fp=" + out.getFilePointer());
1966
1994
1967
1995
// Write docIDs first, as their own chunk, so that at intersect time we can add all docIDs w/o
@@ -2003,16 +2031,16 @@ assert valuesInOrderAndBounds(
2003
2031
// split dimensions. Because it is an expensive operation, the frequency we recompute the
2004
2032
// bounds is given
2005
2033
// by SPLITS_BEFORE_EXACT_BOUNDS.
2006
- if (numLeaves != leafBlockFPs . length
2034
+ if (numLeaves != totalNumLeaves
2007
2035
&& config .numIndexDims () > 2
2008
2036
&& Arrays .stream (parentSplits ).sum () % SPLITS_BEFORE_EXACT_BOUNDS == 0 ) {
2009
2037
computePackedValueBounds (points , minPackedValue , maxPackedValue );
2010
2038
}
2011
2039
splitDim = split (minPackedValue , maxPackedValue , parentSplits );
2012
2040
}
2013
2041
2014
- assert numLeaves <= leafBlockFPs . length
2015
- : "numLeaves=" + numLeaves + " leafBlockFPs.length =" + leafBlockFPs . length ;
2042
+ assert numLeaves <= totalNumLeaves
2043
+ : "numLeaves=" + numLeaves + " totalNumLeaves =" + totalNumLeaves ;
2016
2044
2017
2045
// How many leaves will be in the left tree:
2018
2046
final int numLeftLeafNodes = getNumLeftLeafNodes (numLeaves );
@@ -2078,6 +2106,7 @@ assert valuesInOrderAndBounds(
2078
2106
splitPackedValues ,
2079
2107
splitDimensionValues ,
2080
2108
leafBlockFPs ,
2109
+ totalNumLeaves ,
2081
2110
spareDocIds );
2082
2111
2083
2112
// Recurse on right tree:
@@ -2093,6 +2122,7 @@ assert valuesInOrderAndBounds(
2093
2122
splitPackedValues ,
2094
2123
splitDimensionValues ,
2095
2124
leafBlockFPs ,
2125
+ totalNumLeaves ,
2096
2126
spareDocIds );
2097
2127
2098
2128
parentSplits [splitDim ]--;
0 commit comments