Simplify.

jpountz · jpountz · commit 5b2bb4d5fac3 · 2025-05-20T21:54:41.000+02:00
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene103/Lucene103PostingsReader.java
@@ -1036,92 +1036,47 @@ public void intoBitSet(int upTo, FixedBitSet bitSet, int offset) throws IOExcept
     }
 
     @Override
-    public DocAndFreqBuffer nextPostings(int upTo, DocAndFreqBuffer reuse) throws IOException {
+    public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
+      assert needsRefilling == false;
+
       if (needsFreq == false) {
-        return super.nextPostings(upTo, reuse);
+        super.nextPostings(upTo, buffer);
+        return;
       }
-      assert needsRefilling == false;
 
+      buffer.size = 0;
       if (doc >= upTo) {
-        reuse.size = 0;
-        return reuse;
+        return;
       }
 
       // Only return docs from the current block
-      reuse.grow(BLOCK_SIZE);
+      buffer.growNoCopy(BLOCK_SIZE);
       upTo = (int) Math.min(upTo, level0LastDocID + 1L);
 
       // Frequencies are decoded lazily, calling freq() makes sure that the freq block is decoded
       freq();
 
-      int start, size;
-
+      int start = docBufferUpto - 1;
+      buffer.size = 0;
       switch (encoding) {
         case PACKED:
-          start = docBufferUpto - 1;
           int end = computeBufferEndBoundary(upTo);
-          size = end - start;
-          System.arraycopy(docBuffer, start, reuse.docs, 0, size);
+          buffer.size = end - start;
+          System.arraycopy(docBuffer, start, buffer.docs, 0, buffer.size);
           break;
         case UNARY:
-          start = docBufferUpto - 1;
-          if (upTo > level0LastDocID) {
-            assert upTo == level0LastDocID + 1;
-            end = BLOCK_SIZE;
-          } else {
-            int numBits = upTo - docBitSetBase;
-            int lastWordIndex = numBits >> 6;
-            end =
-                docCumulativeWordPopCounts[lastWordIndex]
-                    - Long.bitCount(docBitSet.getBits()[lastWordIndex] >>> numBits);
-          }
-          size = end - start;
-
-          int firstWordIndex = (doc - docBitSetBase) >> 6;
-          int lastWordIndex = (upTo - 1 - docBitSetBase) >> 6;
-
-          int size2 =
-              enumerateSetBits(
-                  docBitSet.getBits()[firstWordIndex], firstWordIndex << 6, reuse.docs, 0);
-          // Remove docs from the first word that are before the current doc
-          int numDocsBeforeCurrentDoc = size2;
-          for (int i = 0; i < size2; ++i) {
-            if (reuse.docs[i] >= doc - docBitSetBase) {
-              numDocsBeforeCurrentDoc = i;
-              break;
-            }
-          }
-          size2 -= numDocsBeforeCurrentDoc;
-          System.arraycopy(reuse.docs, numDocsBeforeCurrentDoc, reuse.docs, 0, size2);
-
-          for (int i = firstWordIndex + 1; i <= lastWordIndex; ++i) {
-            size2 = enumerateSetBits(docBitSet.getBits()[i], i << 6, reuse.docs, size2);
-          }
-          assert size2 >= size : size2 + " < " + size;
-          for (int i = 0; i < size; ++i) {
-            reuse.docs[i] += docBitSetBase;
-          }
+          docBitSet.forEach(
+              doc - docBitSetBase,
+              upTo - docBitSetBase,
+              docBitSetBase,
+              d -> buffer.docs[buffer.size++] = d);
           break;
-        default:
-          throw new AssertionError();
       }
 
-      assert size > 0;
-      System.arraycopy(freqBuffer, start, reuse.freqs, 0, size);
-      reuse.size = size;
+      assert buffer.size > 0;
+      System.arraycopy(freqBuffer, start, buffer.freqs, 0, buffer.size);
 
       advance(upTo);
-
-      return reuse;
-    }
-
-    private static int enumerateSetBits(long word, int base, int[] dest, int offset) {
-      while (word != 0L) {
-        int ntz = Long.numberOfTrailingZeros(word);
-        dest[offset++] = base + ntz;
-        word ^= 1L << ntz;
-      }
-      return offset;
     }
 
     private int computeBufferEndBoundary(int upTo) {
diff --git a/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java b/lucene/core/src/java/org/apache/lucene/index/CompositeReader.java
@@ -44,7 +44,7 @@
  * synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
  * your own (non-Lucene) objects instead.
  */
-public abstract non-sealed class CompositeReader extends IndexReader {
+public abstract class CompositeReader extends IndexReader {
 
   private volatile CompositeReaderContext readerContext = null; // lazy init
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexReader.java b/lucene/core/src/java/org/apache/lucene/index/IndexReader.java
@@ -63,7 +63,7 @@
  * synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
  * your own (non-Lucene) objects instead.
  */
-public abstract sealed class IndexReader implements Closeable permits CompositeReader, LeafReader {
+public abstract class IndexReader implements Closeable {
 
   private boolean closed = false;
   private boolean closedByChild = false;
diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java
@@ -44,7 +44,7 @@
  * synchronization, you should <b>not</b> synchronize on the <code>IndexReader</code> instance; use
  * your own (non-Lucene) objects instead.
  */
-public abstract non-sealed class LeafReader extends IndexReader {
+public abstract class LeafReader extends IndexReader {
 
   private final LeafReaderContext readerContext = new LeafReaderContext(this);
 
diff --git a/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java b/lucene/core/src/java/org/apache/lucene/index/PostingsEnum.java
@@ -100,42 +100,44 @@ protected PostingsEnum() {}
   public abstract BytesRef getPayload() throws IOException;
 
   /**
-   * Return a new batch of doc IDs and frequencies, starting at the current doc ID, and ending
-   * before {@code upTo}.
+   * Fill a buffer of doc IDs and frequencies with some number of doc IDs and their corresponding
+   * frequencies, starting at the current doc ID, and ending before {@code upTo}.
    *
-   * <p>An empty return value indicates that there are no postings left between the current doc ID
-   * and {@code upTo}.
+   * <p>An empty buffer after this method returns indicates that there are no postings left between
+   * the current doc ID and {@code upTo}.
    *
-   * <p>This method behaves as if implemented as below, which is the default implementation:
+   * <p>Implementations should ideally fill the buffer with a number of entries comprised between 8
+   * and a couple hundreds, to keep heap requirements contained, while still being large enough to
+   * enable operations on the buffer to auto-vectorize efficiently.
+   *
+   * <p>The default implementation is provided below:
    *
    * <pre class="prettyprint">
-   * int batchSize = 16;
-   * reuse.grow(batchSize);
+   * int batchSize = 16; // arbitrary
+   * buffer.growNoCopy(batchSize);
    * int size = 0;
    * for (int doc = docID(); doc &lt; upTo &amp;&amp; size &lt; batchSize; doc = nextDoc()) {
-   *   reuse.docs[size] = doc;
-   *   reuse.freqs[size] = freq();
+   *   buffer.docs[size] = doc;
+   *   buffer.freqs[size] = freq();
    *   ++size;
    * }
-   * reuse.size = size;
-   * return reuse;
+   * buffer.size = size;
    * </pre>
    *
-   * <p><b>NOTE</b>: The returned {@link DocAndFreqBuffer} should not hold references to internal
+   * <p><b>NOTE</b>: The provided {@link DocAndFreqBuffer} should not hold references to internal
    * data structures.
    *
    * @lucene.internal
    */
-  public DocAndFreqBuffer nextPostings(int upTo, DocAndFreqBuffer reuse) throws IOException {
-    int batchSize = 16;
-    reuse.grow(batchSize);
+  public void nextPostings(int upTo, DocAndFreqBuffer buffer) throws IOException {
+    int batchSize = 16; // arbitrary
+    buffer.growNoCopy(batchSize);
     int size = 0;
     for (int doc = docID(); doc < upTo && size < batchSize; doc = nextDoc()) {
-      reuse.docs[size] = doc;
-      reuse.freqs[size] = freq();
+      buffer.docs[size] = doc;
+      buffer.freqs[size] = freq();
       ++size;
     }
-    reuse.size = size;
-    return reuse;
+    buffer.size = size;
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java b/lucene/core/src/java/org/apache/lucene/search/BooleanScorer.java
@@ -139,23 +139,32 @@ private void scoreWindowIntoBitSetAndReplay(
       if (w.doc < min) {
         it.advance(min);
       }
-      if (buckets == null) {
+      if (buckets == null) { // means minShouldMatch=1 and scores are not needed
         // This doesn't apply live docs, so we'll need to apply them later
         it.intoBitSet(max, matching, base);
+      } else if (needsScores) {
+        for (w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer);
+            docAndScoreBuffer.size > 0;
+            w.scorer.nextDocsAndScores(max, acceptDocs, docAndScoreBuffer)) {
+          for (int index = 0; index < docAndScoreBuffer.size; ++index) {
+            final int doc = docAndScoreBuffer.docs[index];
+            final float score = docAndScoreBuffer.scores[index];
+            final int d = doc & MASK;
+            matching.set(d);
+            final Bucket bucket = buckets[d];
+            bucket.freq++;
+            bucket.score += score;
+          }
+        }
       } else {
-        for (DocAndScoreBuffer buffer = w.scorer.nextScores(max, acceptDocs, docAndScoreBuffer);
-            buffer.size > 0;
-            buffer = w.scorer.nextScores(max, acceptDocs, docAndScoreBuffer)) {
-          for (int index = 0; index < buffer.size; ++index) {
-            final int doc = buffer.docs[index];
-            final float score = buffer.scores[index];
+        // Scores are not needed but we need to keep track of freqs to know which hits match
+        assert minShouldMatch > 1;
+        for (int doc = it.docID(); doc < max; doc = it.nextDoc()) {
+          if (acceptDocs == null || acceptDocs.get(doc)) {
             final int d = doc & MASK;
             matching.set(d);
             final Bucket bucket = buckets[d];
             bucket.freq++;
-            if (needsScores) {
-              bucket.score += score;
-            }
           }
         }
       }
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocAndFreqBuffer.java b/lucene/core/src/java/org/apache/lucene/search/DocAndFreqBuffer.java
@@ -40,10 +40,10 @@ public final class DocAndFreqBuffer {
   public DocAndFreqBuffer() {}
 
   /** Grow both arrays to ensure that they can store at least the given number of entries. */
-  public void grow(int minSize) {
+  public void growNoCopy(int minSize) {
     if (docs.length < minSize) {
-      docs = ArrayUtil.grow(docs, minSize);
-      freqs = ArrayUtil.growExact(freqs, docs.length);
+      docs = ArrayUtil.growNoCopy(docs, minSize);
+      freqs = new int[docs.length];
     }
   }
 
diff --git a/lucene/core/src/java/org/apache/lucene/search/DocAndScoreBuffer.java b/lucene/core/src/java/org/apache/lucene/search/DocAndScoreBuffer.java
@@ -41,10 +41,10 @@ public final class DocAndScoreBuffer {
   public DocAndScoreBuffer() {}
 
   /** Grow both arrays to ensure that they can store at least the given number of entries. */
-  public void grow(int minSize) {
+  public void growNoCopy(int minSize) {
     if (docs.length < minSize) {
-      docs = ArrayUtil.grow(docs, minSize);
-      scores = ArrayUtil.growExact(scores, docs.length);
+      docs = ArrayUtil.growNoCopy(docs, minSize);
+      scores = new float[docs.length];
     }
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java b/lucene/core/src/java/org/apache/lucene/search/MaxScoreBulkScorer.java
@@ -220,11 +220,15 @@ private void scoreInnerWindowSingleEssentialClause(
 
     // single essential clause in this window, we can iterate it directly and skip the bitset.
     // this is a common case for 2-clauses queries
-    for (DocAndScoreBuffer buffer = top.scorer.nextScores(upTo, acceptDocs, docAndScoreBuffer);
-        buffer.size > 0;
-        buffer = top.scorer.nextScores(upTo, acceptDocs, docAndScoreBuffer)) {
-      for (int i = 0; i < buffer.size; ++i) {
-        scoreNonEssentialClauses(collector, buffer.docs[i], buffer.scores[i], firstEssentialScorer);
+    for (top.scorer.nextDocsAndScores(upTo, acceptDocs, docAndScoreBuffer);
+        docAndScoreBuffer.size > 0;
+        top.scorer.nextDocsAndScores(upTo, acceptDocs, docAndScoreBuffer)) {
+      for (int i = 0; i < docAndScoreBuffer.size; ++i) {
+        scoreNonEssentialClauses(
+            collector,
+            docAndScoreBuffer.docs[i],
+            docAndScoreBuffer.scores[i],
+            firstEssentialScorer);
       }
     }
 
@@ -308,13 +312,12 @@ private void scoreInnerWindowMultipleEssentialClauses(
 
     // Collect matches of essential clauses into a bitset
     do {
-      for (DocAndScoreBuffer buffer =
-              top.scorer.nextScores(innerWindowMax, acceptDocs, docAndScoreBuffer);
-          buffer.size > 0;
-          buffer = top.scorer.nextScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
-        for (int index = 0; index < buffer.size; ++index) {
-          final int doc = buffer.docs[index];
-          final float score = buffer.scores[index];
+      for (top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer);
+          docAndScoreBuffer.size > 0;
+          top.scorer.nextDocsAndScores(innerWindowMax, acceptDocs, docAndScoreBuffer)) {
+        for (int index = 0; index < docAndScoreBuffer.size; ++index) {
+          final int doc = docAndScoreBuffer.docs[index];
+          final float score = docAndScoreBuffer.scores[index];
           final int i = doc - innerWindowMin;
           windowMatches[i >>> 6] |= 1L << i;
           windowScores[i] += score;
diff --git a/lucene/core/src/java/org/apache/lucene/search/Scorer.java b/lucene/core/src/java/org/apache/lucene/search/Scorer.java
@@ -85,46 +85,48 @@ public int advanceShallow(int target) throws IOException {
    * <p>An empty return value indicates that there are no postings left between the current doc ID
    * and {@code upTo}.
    *
-   * <p>This method behaves as if implemented as below, which is the default implementation:
+   * <p>Implementations should ideally fill the buffer with a number of entries comprised between 8
+   * and a couple hundreds, to keep heap requirements contained, while still being large enough to
+   * enable operations on the buffer to auto-vectorize efficiently.
+   *
+   * <p>The default implementation is provided below:
    *
    * <pre class="prettyprint">
-   * int batchSize = 16;
-   * reuse.grow(batchSize);
+   * int batchSize = 16; // arbitrary
+   * buffer.growNoCopy(batchSize);
    * int size = 0;
    * DocIdSetIterator iterator = iterator();
    * for (int doc = docID(); doc &lt; upTo &amp;&amp; size &lt; batchSize; doc = iterator.nextDoc()) {
    *   if (liveDocs == null || liveDocs.get(doc)) {
-   *     reuse.docs[size] = doc;
-   *     reuse.scores[size] = score();
+   *     buffer.docs[size] = doc;
+   *     buffer.scores[size] = score();
    *     ++size;
    *   }
    * }
    * reuse.size = size;
-   * return reuse;
    * </pre>
    *
-   * <p><b>NOTE</b>: The returned {@link DocAndScoreBuffer} should not hold references to internal
+   * <p><b>NOTE</b>: The provided {@link DocAndScoreBuffer} should not hold references to internal
    * data structures.
    *
    * <p><b>NOTE</b>: In case this {@link Scorer} exposes a {@link #twoPhaseIterator()
    * TwoPhaseIterator}, it should be positioned on a matching document before this method is called.
    *
    * @lucene.internal
    */
-  public DocAndScoreBuffer nextScores(int upTo, Bits liveDocs, DocAndScoreBuffer reuse)
+  public void nextDocsAndScores(int upTo, Bits liveDocs, DocAndScoreBuffer buffer)
       throws IOException {
-    int batchSize = 16;
-    reuse.grow(batchSize);
+    int batchSize = 16; // arbitrary
+    buffer.growNoCopy(batchSize);
     int size = 0;
     DocIdSetIterator iterator = iterator();
     for (int doc = docID(); doc < upTo && size < batchSize; doc = iterator.nextDoc()) {
       if (liveDocs == null || liveDocs.get(doc)) {
-        reuse.docs[size] = doc;
-        reuse.scores[size] = score();
+        buffer.docs[size] = doc;
+        buffer.scores[size] = score();
         ++size;
       }
     }
-    reuse.size = size;
-    return reuse;
+    buffer.size = size;
   }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/search/TermScorer.java b/lucene/core/src/java/org/apache/lucene/search/TermScorer.java
diff --git a/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java b/lucene/core/src/java/org/apache/lucene/search/similarities/Similarity.java
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/AssertingLeafReader.java
diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorer.java b/lucene/test-framework/src/java/org/apache/lucene/tests/search/AssertingScorer.java

Original file line number	Diff line number	Diff line change
`@@ -40,10 +40,10 @@ public final class DocAndFreqBuffer {`
`40`	`40`	`public DocAndFreqBuffer() {}`
`41`	`41`
`42`	`42`	`/** Grow both arrays to ensure that they can store at least the given number of entries. */`
`43`		`- public void grow(int minSize) {`
	`43`	`+ public void growNoCopy(int minSize) {`
`44`	`44`	`if (docs.length < minSize) {`
`45`		`- docs = ArrayUtil.grow(docs, minSize);`
`46`		`- freqs = ArrayUtil.growExact(freqs, docs.length);`
	`45`	`+ docs = ArrayUtil.growNoCopy(docs, minSize);`
	`46`	`+ freqs = new int[docs.length];`
`47`	`47`	`}`
`48`	`48`	`}`
`49`	`49`
Original file line number	Diff line number	Diff line change
`@@ -41,10 +41,10 @@ public final class DocAndScoreBuffer {`
`41`	`41`	`public DocAndScoreBuffer() {}`
`42`	`42`
`43`	`43`	`/** Grow both arrays to ensure that they can store at least the given number of entries. */`
`44`		`- public void grow(int minSize) {`
	`44`	`+ public void growNoCopy(int minSize) {`
`45`	`45`	`if (docs.length < minSize) {`
`46`		`- docs = ArrayUtil.grow(docs, minSize);`
`47`		`- scores = ArrayUtil.growExact(scores, docs.length);`
	`46`	`+ docs = ArrayUtil.growNoCopy(docs, minSize);`
	`47`	`+ scores = new float[docs.length];`
`48`	`48`	`}`
`49`	`49`	`}`
`50`	`50`	`}`