diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 867f53b1e06c..645506f4c642 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -59,6 +59,8 @@ API Changes * GITHUB#14401: Added LeafCollector#collectRange to enable collector to take advantage of pre-aggregated data to speed up faceting. (Adrien Grand) +* GITHUB#14417: Support modifying segmentInfos.counter in IndexWriter (Jialiang Guo) + New Features --------------------- (No changes) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 479fc7901192..772b35fad94b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1427,6 +1427,25 @@ public synchronized void advanceSegmentInfosVersion(long newVersion) { changed(); } + /** + * If {@link SegmentInfos#counter} is below {@code newCounter} then update it to this value. + * + * @lucene.internal + */ + public synchronized void advanceSegmentInfosCounter(long newCounter) { + this.ensureOpen(); + if (segmentInfos.counter < newCounter) { + segmentInfos.counter = newCounter; + } + changed(); + } + + /** Returns the {@link SegmentInfos#counter}. */ + public long getSegmentInfosCounter() { + this.ensureOpen(); + return segmentInfos.counter; + } + /** * Returns true if this index has deletions (including buffered deletions). Note that this will * return true if there are buffered Term/Query deletions, even if it turns out those buffered diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 0943a02022df..e64901080712 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5037,4 +5037,88 @@ public void testDocValuesSkippingIndexWithoutDocValues() throws Exception { } } } + + public void testAdvanceSegmentInfosCounter() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer; + IndexReader reader; + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + + // add 10 documents + for (int i = 0; i < 10; i++) { + addDocWithIndex(writer, i); + writer.commit(); + } + writer.advanceSegmentInfosCounter(1); + assertTrue(writer.getSegmentInfosCounter() >= 1); + + writer.advanceSegmentInfosCounter(1000); + // add 40 documents + for (int i = 10; i < 50; i++) { + addDocWithIndex(writer, i); + writer.commit(); + } + + // There may be merge operations in the background, here only verifies that the current segment + // counter is greater than 1000. + assertTrue(writer.getSegmentInfosCounter() >= 1000); + + IndexWriter.DocStats docStats = writer.getDocStats(); + assertEquals(50, docStats.maxDoc); + assertEquals(50, docStats.numDocs); + writer.close(); + + // check that the index reader gives the same numbers. + reader = DirectoryReader.open(dir); + assertEquals(50, reader.maxDoc()); + assertEquals(50, reader.numDocs()); + reader.close(); + dir.close(); + } + + public void testAdvanceSegmentCounterInCrashAndRecoveryScenario() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer; + IndexReader reader; + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDocWithIndex(writer, i); + if (random().nextBoolean()) { + writer.commit(); + } + } + IndexWriter.DocStats docStats = writer.getDocStats(); + assertEquals(100, docStats.maxDoc); + assertEquals(100, docStats.numDocs); + writer.commit(); + writer.close(); + + // recovery and advance segment counter + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + assertEquals(100, writer.getDocStats().numDocs); + long newSegmentCounter = writer.getSegmentInfosCounter() + 1000; + writer.advanceSegmentInfosCounter(newSegmentCounter); + + // add 10 documents + for (int i = 0; i < 10; i++) { + addDocWithIndex(writer, i); + if (random().nextBoolean()) { + writer.commit(); + } + } + + assertTrue(writer.getSegmentInfosCounter() >= newSegmentCounter); + + assertEquals(110, writer.getDocStats().numDocs); + // check that the index reader gives the same numbers. + writer.commit(); + reader = DirectoryReader.open(dir); + assertEquals(110, reader.maxDoc()); + assertEquals(110, reader.numDocs()); + reader.close(); + writer.close(); + dir.close(); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java index 44c72caa3978..9f13fd2a8634 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java @@ -82,6 +82,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas protected final AtomicInteger addCount = new AtomicInteger(); protected final AtomicInteger delCount = new AtomicInteger(); protected final AtomicInteger packCount = new AtomicInteger(); + protected AtomicLong maxAdvancedSegmentCounter = new AtomicLong(0); protected Directory dir; protected IndexWriter writer; @@ -189,6 +190,20 @@ public void run() { addedField = null; } + // Maybe advance segment counter. Run with a relatively low probability to avoid + // testing slowdowns caused by synchronous operations. + if (random().nextInt(7) == 5) { + long newSegmentCounter = writer.getSegmentInfosCounter() + 100; + writer.advanceSegmentInfosCounter(newSegmentCounter); + if (VERBOSE) { + System.out.println( + Thread.currentThread().getName() + + " advance segment counter to " + + newSegmentCounter); + } + maxAdvancedSegmentCounter.accumulateAndGet(newSegmentCounter, Math::max); + } + if (random().nextBoolean()) { if (random().nextBoolean()) { @@ -606,6 +621,8 @@ public void message(String component, String message) { thread.join(); } + assertTrue(writer.getSegmentInfosCounter() >= maxAdvancedSegmentCounter.get()); + if (VERBOSE) { System.out.println( "TEST: done join indexing threads [" @@ -613,7 +630,9 @@ public void message(String component, String message) { + " ms]; addCount=" + addCount + " delCount=" - + delCount); + + delCount + + " segmentCounter=" + + writer.getSegmentInfosCounter()); } final IndexSearcher s = getFinalSearcher();