Skip to content

Commit 5b2bb4d

Browse files
committed
Simplify.
1 parent af1e18c commit 5b2bb4d

File tree

14 files changed

+113
-162
lines changed

14 files changed

+113
-162
lines changed

lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java

Lines changed: 19 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1036,92 +1036,47 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
10361036
}
10371037

10381038
@Override
1039-
public DocAndFreqBuffer nextPostings(int upTo, DocAndFreqBuffer reuse) throws IOException {
1039+
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
1040+
assert needsRefilling == false;
1041+
10401042
if (needsFreq == false) {
1041-
return super.nextPostings(upTo, reuse);
1043+
super.nextPostings(upTo, buffer);
1044+
return;
10421045
}
1043-
assert needsRefilling == false;
10441046

1047+
buffer.size = 0;
10451048
if (doc >= upTo) {
1046-
reuse.size = 0;
1047-
return reuse;
1049+
return;
10481050
}
10491051

10501052
// Only return docs from the current block
1051-
reuse.grow(BLOCK_SIZE);
1053+
buffer.growNoCopy(BLOCK_SIZE);
10521054
upTo = (int) Math.min(upTo, level0LastDocID + 1L);
10531055

10541056
// Frequencies are decoded lazily, calling freq() makes sure that the freq block is decoded
10551057
freq();
10561058

1057-
int start, size;
1058-
1059+
int start = docBufferUpto - 1;
1060+
buffer.size = 0;
10591061
switch (encoding) {
10601062
case PACKED:
1061-
start = docBufferUpto - 1;
10621063
int end = computeBufferEndBoundary(upTo);
1063-
size = end - start;
1064-
System.arraycopy(docBuffer, start, reuse.docs, 0, size);
1064+
buffer.size = end - start;
1065+
System.arraycopy(docBuffer, start, buffer.docs, 0, buffer.size);
10651066
break;
10661067
case UNARY:
1067-
start = docBufferUpto - 1;
1068-
if (upTo > level0LastDocID) {
1069-
assert upTo == level0LastDocID + 1;
1070-
end = BLOCK_SIZE;
1071-
} else {
1072-
int numBits = upTo - docBitSetBase;
1073-
int lastWordIndex = numBits >> 6;
1074-
end =
1075-
docCumulativeWordPopCounts[lastWordIndex]
1076-
- Long.bitCount(docBitSet.getBits()[lastWordIndex] >>> numBits);
1077-
}
1078-
size = end - start;
1079-
1080-
int firstWordIndex = (doc - docBitSetBase) >> 6;
1081-
int lastWordIndex = (upTo - 1 - docBitSetBase) >> 6;
1082-
1083-
int size2 =
1084-
enumerateSetBits(
1085-
docBitSet.getBits()[firstWordIndex], firstWordIndex << 6, reuse.docs, 0);
1086-
// Remove docs from the first word that are before the current doc
1087-
int numDocsBeforeCurrentDoc = size2;
1088-
for (int i = 0; i < size2; ++i) {
1089-
if (reuse.docs[i] >= doc - docBitSetBase) {
1090-
numDocsBeforeCurrentDoc = i;
1091-
break;
1092-
}
1093-
}
1094-
size2 -= numDocsBeforeCurrentDoc;
1095-
System.arraycopy(reuse.docs, numDocsBeforeCurrentDoc, reuse.docs, 0, size2);
1096-
1097-
for (int i = firstWordIndex + 1; i <= lastWordIndex; ++i) {
1098-
size2 = enumerateSetBits(docBitSet.getBits()[i], i << 6, reuse.docs, size2);
1099-
}
1100-
assert size2 >= size : size2 + " < " + size;
1101-
for (int i = 0; i < size; ++i) {
1102-
reuse.docs[i] += docBitSetBase;
1103-
}
1068+
docBitSet.forEach(
1069+
doc - docBitSetBase,
1070+
upTo - docBitSetBase,
1071+
docBitSetBase,
1072+
d -> buffer.docs[buffer.size++] = d);
11041073
break;
1105-
default:
1106-
throw new AssertionError();
11071074
}
11081075

1109-
assert size > 0;
1110-
System.arraycopy(freqBuffer, start, reuse.freqs, 0, size);
1111-
reuse.size = size;
1076+
assert buffer.size > 0;
1077+
System.arraycopy(freqBuffer, start, buffer.freqs, 0, buffer.size);
11121078

11131079
advance(upTo);
1114-
1115-
return reuse;
1116-
}
1117-
1118-
private static int enumerateSetBits(long word, int base, int[] dest, int offset) {
1119-
while (word != 0L) {
1120-
int ntz = Long.numberOfTrailingZeros(word);
1121-
dest[offset++] = base + ntz;
1122-
word ^= 1L << ntz;
1123-
}
1124-
return offset;
11251080
}
11261081

11271082
private int computeBufferEndBoundary(int upTo) {

lucene/core/src/java/org/apache/lucene/index/CompositeReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
* synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
4545
* your own (non-Lucene) objects instead.
4646
*/
47-
public abstract non-sealed class CompositeReader extends IndexReader {
47+
public abstract class CompositeReader extends IndexReader {
4848

4949
private volatile CompositeReaderContext readerContext = null; // lazy init
5050

lucene/core/src/java/org/apache/lucene/index/IndexReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
* synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
6464
* your own (non-Lucene) objects instead.
6565
*/
66-
public abstract sealed class IndexReader implements Closeable permits CompositeReader, LeafReader {
66+
public abstract class IndexReader implements Closeable {
6767

6868
private boolean closed = false;
6969
private boolean closedByChild = false;

lucene/core/src/java/org/apache/lucene/index/LeafReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
* synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
4545
* your own (non-Lucene) objects instead.
4646
*/
47-
public abstract non-sealed class LeafReader extends IndexReader {
47+
public abstract class LeafReader extends IndexReader {
4848

4949
private final LeafReaderContext readerContext = new LeafReaderContext(this);
5050

lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -100,42 +100,44 @@ protected PostingsEnum() {}
100100
public abstract BytesRef getPayload() throws IOException;
101101

102102
/**
103-
* Return a new batch of doc IDs and frequencies, starting at the current doc ID, and ending
104-
* before {@code upTo}.
103+
* Fill a buffer of doc IDs and frequencies with some number of doc IDs and their corresponding
104+
* frequencies, starting at the current doc ID, and ending before {@code upTo}.
105105
*
106-
* <p>An empty return value indicates that there are no postings left between the current doc ID
107-
* and {@code upTo}.
106+
* <p>An empty buffer after this method returns indicates that there are no postings left between
107+
* the current doc ID and {@code upTo}.
108108
*
109-
* <p>This method behaves as if implemented as below, which is the default implementation:
109+
* <p>Implementations should ideally fill the buffer with a number of entries comprised between 8
110+
* and a couple hundreds, to keep heap requirements contained, while still being large enough to
111+
* enable operations on the buffer to auto-vectorize efficiently.
112+
*
113+
* <p>The default implementation is provided below:
110114
*
111115
* <pre class="prettyprint">
112-
* int batchSize = 16;
113-
* reuse.grow(batchSize);
116+
* int batchSize = 16; // arbitrary
117+
* buffer.growNoCopy(batchSize);
114118
* int size = 0;
115119
* for (int doc = docID(); doc &lt; upTo &amp;&amp; size &lt; batchSize; doc = nextDoc()) {
116-
* reuse.docs[size] = doc;
117-
* reuse.freqs[size] = freq();
120+
* buffer.docs[size] = doc;
121+
* buffer.freqs[size] = freq();
118122
* ++size;
119123
* }
120-
* reuse.size = size;
121-
* return reuse;
124+
* buffer.size = size;
122125
* </pre>
123126
*
124-
* <p><b>NOTE</b>: The returned {@link DocAndFreqBuffer} should not hold references to internal
127+
* <p><b>NOTE</b>: The provided {@link DocAndFreqBuffer} should not hold references to internal
125128
* data structures.
126129
*
127130
* @lucene.internal
128131
*/
129-
public DocAndFreqBuffer nextPostings(int upTo, DocAndFreqBuffer reuse) throws IOException {
130-
int batchSize = 16;
131-
reuse.grow(batchSize);
132+
public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
133+
int batchSize = 16; // arbitrary
134+
buffer.growNoCopy(batchSize);
132135
int size = 0;
133136
for (int doc = docID(); doc < upTo && size < batchSize; doc = nextDoc()) {
134-
reuse.docs[size] = doc;
135-
reuse.freqs[size] = freq();
137+
buffer.docs[size] = doc;
138+
buffer.freqs[size] = freq();
136139
++size;
137140
}
138-
reuse.size = size;
139-
return reuse;
141+
buffer.size = size;
140142
}
141143
}

lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -139,23 +139,32 @@ private void scoreWindowIntoBitSetAndReplay(
139139
if (w.doc < min) {
140140
it.advance(min);
141141
}
142-
if (buckets == null) {
142+
if (buckets == null) { // means minShouldMatch=1 and scores are not needed
143143
// This doesn't apply live docs, so we'll need to apply them later
144144
it.intoBitSet(max, matching, base);
145+
} else if (needsScores) {
146+
for (w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer);
147+
docAndScoreBuffer.size > 0;
148+
w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
149+
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
150+
final int doc = docAndScoreBuffer.docs[index];
151+
final float score = docAndScoreBuffer.scores[index];
152+
final int d = doc & MASK;
153+
matching.set(d);
154+
final Bucket bucket = buckets[d];
155+
bucket.freq++;
156+
bucket.score += score;
157+
}
158+
}
145159
} else {
146-
for (DocAndScoreBuffer buffer = w.scorer.nextScores(max, acceptDocs, docAndScoreBuffer);
147-
buffer.size > 0;
148-
buffer = w.scorer.nextScores(max, acceptDocs, docAndScoreBuffer)) {
149-
for (int index = 0; index < buffer.size; ++index) {
150-
final int doc = buffer.docs[index];
151-
final float score = buffer.scores[index];
160+
// Scores are not needed but we need to keep track of freqs to know which hits match
161+
assert minShouldMatch > 1;
162+
for (int doc = it.docID(); doc < max; doc = it.nextDoc()) {
163+
if (acceptDocs == null || acceptDocs.get(doc)) {
152164
final int d = doc & MASK;
153165
matching.set(d);
154166
final Bucket bucket = buckets[d];
155167
bucket.freq++;
156-
if (needsScores) {
157-
bucket.score += score;
158-
}
159168
}
160169
}
161170
}

lucene/core/src/java/org/apache/lucene/search/DocAndFreqBuffer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,10 +40,10 @@ public final class DocAndFreqBuffer {
4040
public DocAndFreqBuffer() {}
4141

4242
/** Grow both arrays to ensure that they can store at least the given number of entries. */
43-
public void grow(int minSize) {
43+
public void growNoCopy(int minSize) {
4444
if (docs.length < minSize) {
45-
docs = ArrayUtil.grow(docs, minSize);
46-
freqs = ArrayUtil.growExact(freqs, docs.length);
45+
docs = ArrayUtil.growNoCopy(docs, minSize);
46+
freqs = new int[docs.length];
4747
}
4848
}
4949

lucene/core/src/java/org/apache/lucene/search/DocAndScoreBuffer.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ public final class DocAndScoreBuffer {
4141
public DocAndScoreBuffer() {}
4242

4343
/** Grow both arrays to ensure that they can store at least the given number of entries. */
44-
public void grow(int minSize) {
44+
public void growNoCopy(int minSize) {
4545
if (docs.length < minSize) {
46-
docs = ArrayUtil.grow(docs, minSize);
47-
scores = ArrayUtil.growExact(scores, docs.length);
46+
docs = ArrayUtil.growNoCopy(docs, minSize);
47+
scores = new float[docs.length];
4848
}
4949
}
5050
}

lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -220,11 +220,15 @@ private void scoreInnerWindowSingleEssentialClause(
220220

221221
// single essential clause in this window, we can iterate it directly and skip the bitset.
222222
// this is a common case for 2-clauses queries
223-
for (DocAndScoreBuffer buffer = top.scorer.nextScores(upTo, acceptDocs, docAndScoreBuffer);
224-
buffer.size > 0;
225-
buffer = top.scorer.nextScores(upTo, acceptDocs, docAndScoreBuffer)) {
226-
for (int i = 0; i < buffer.size; ++i) {
227-
scoreNonEssentialClauses(collector, buffer.docs[i], buffer.scores[i], firstEssentialScorer);
223+
for (top.scorer.nextDocsAndScores(upTo, acceptDocs, docAndScoreBuffer);
224+
docAndScoreBuffer.size > 0;
225+
top.scorer.nextDocsAndScores(upTo, acceptDocs, docAndScoreBuffer)) {
226+
for (int i = 0; i < docAndScoreBuffer.size; ++i) {
227+
scoreNonEssentialClauses(
228+
collector,
229+
docAndScoreBuffer.docs[i],
230+
docAndScoreBuffer.scores[i],
231+
firstEssentialScorer);
228232
}
229233
}
230234

@@ -308,13 +312,12 @@ private void scoreInnerWindowMultipleEssentialClauses(
308312

309313
// Collect matches of essential clauses into a bitset
310314
do {
311-
for (DocAndScoreBuffer buffer =
312-
top.scorer.nextScores(innerWindowMax, acceptDocs, docAndScoreBuffer);
313-
buffer.size > 0;
314-
buffer = top.scorer.nextScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
315-
for (int index = 0; index < buffer.size; ++index) {
316-
final int doc = buffer.docs[index];
317-
final float score = buffer.scores[index];
315+
for (top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer);
316+
docAndScoreBuffer.size > 0;
317+
top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
318+
for (int index = 0; index < docAndScoreBuffer.size; ++index) {
319+
final int doc = docAndScoreBuffer.docs[index];
320+
final float score = docAndScoreBuffer.scores[index];
318321
final int i = doc - innerWindowMin;
319322
windowMatches[i >>> 6] |= 1L << i;
320323
windowScores[i] += score;

lucene/core/src/java/org/apache/lucene/search/Scorer.java

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -85,46 +85,48 @@ public int advanceShallow(int target) throws IOException {
8585
* <p>An empty return value indicates that there are no postings left between the current doc ID
8686
* and {@code upTo}.
8787
*
88-
* <p>This method behaves as if implemented as below, which is the default implementation:
88+
* <p>Implementations should ideally fill the buffer with a number of entries comprised between 8
89+
* and a couple hundreds, to keep heap requirements contained, while still being large enough to
90+
* enable operations on the buffer to auto-vectorize efficiently.
91+
*
92+
* <p>The default implementation is provided below:
8993
*
9094
* <pre class="prettyprint">
91-
* int batchSize = 16;
92-
* reuse.grow(batchSize);
95+
* int batchSize = 16; // arbitrary
96+
* buffer.growNoCopy(batchSize);
9397
* int size = 0;
9498
* DocIdSetIterator iterator = iterator();
9599
* for (int doc = docID(); doc &lt; upTo &amp;&amp; size &lt; batchSize; doc = iterator.nextDoc()) {
96100
* if (liveDocs == null || liveDocs.get(doc)) {
97-
* reuse.docs[size] = doc;
98-
* reuse.scores[size] = score();
101+
* buffer.docs[size] = doc;
102+
* buffer.scores[size] = score();
99103
* ++size;
100104
* }
101105
* }
102106
* reuse.size = size;
103-
* return reuse;
104107
* </pre>
105108
*
106-
* <p><b>NOTE</b>: The returned {@link DocAndScoreBuffer} should not hold references to internal
109+
* <p><b>NOTE</b>: The provided {@link DocAndScoreBuffer} should not hold references to internal
107110
* data structures.
108111
*
109112
* <p><b>NOTE</b>: In case this {@link Scorer} exposes a {@link #twoPhaseIterator()
110113
* TwoPhaseIterator}, it should be positioned on a matching document before this method is called.
111114
*
112115
* @lucene.internal
113116
*/
114-
public DocAndScoreBuffer nextScores(int upTo, Bits liveDocs, DocAndScoreBuffer reuse)
117+
public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer)
115118
throws IOException {
116-
int batchSize = 16;
117-
reuse.grow(batchSize);
119+
int batchSize = 16; // arbitrary
120+
buffer.growNoCopy(batchSize);
118121
int size = 0;
119122
DocIdSetIterator iterator = iterator();
120123
for (int doc = docID(); doc < upTo && size < batchSize; doc = iterator.nextDoc()) {
121124
if (liveDocs == null || liveDocs.get(doc)) {
122-
reuse.docs[size] = doc;
123-
reuse.scores[size] = score();
125+
buffer.docs[size] = doc;
126+
buffer.scores[size] = score();
124127
++size;
125128
}
126129
}
127-
reuse.size = size;
128-
return reuse;
130+
buffer.size = size;
129131
}
130132
}

0 commit comments

Comments
 (0)