From 055e2d58f722a277daf9a5214cb7134e4f74c56e Mon Sep 17 00:00:00 2001 From: guojialiang Date: Thu, 27 Mar 2025 14:47:20 +0800 Subject: [PATCH 1/7] IndexWriter support advance segmentInfos counter Signed-off-by: guojialiang --- .../org/apache/lucene/index/IndexWriter.java | 18 ++++++++ .../apache/lucene/index/TestIndexWriter.java | 41 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 479fc7901192..80b44f256403 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1427,6 +1427,24 @@ public synchronized void advanceSegmentInfosVersion(long newVersion) { changed(); } + /** + * If {@link SegmentInfos#counter} is below {@code newCounter} then update it to this value. + * + * @lucene.internal + */ + public synchronized void advanceSegmentInfosCounter(long newCounter) { + this.ensureOpen(); + if (segmentInfos.counter < newCounter) { + segmentInfos.counter = newCounter; + } + } + + /** Returns the {@link SegmentInfos#counter}. */ + public synchronized long getSegmentInfosCounter() { + this.ensureOpen(); + return segmentInfos.counter; + } + /** * Returns true if this index has deletions (including buffered deletions). Note that this will * return true if there are buffered Term/Query deletions, even if it turns out those buffered diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 0943a02022df..acae3327b845 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5037,4 +5037,45 @@ public void testDocValuesSkippingIndexWithoutDocValues() throws Exception { } } } + + public void testAdvanceSegmentInfosCounter() throws IOException { + Directory dir = newDirectory(); + + IndexWriter writer; + IndexReader reader; + + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + + // add 10 documents + for (int i = 0; i < 10; i++) { + addDocWithIndex(writer, i); + writer.commit(); + } + long beforeAdvanceSegmentCounter = writer.getSegmentInfosCounter(); + writer.advanceSegmentInfosCounter(1); + assertEquals(beforeAdvanceSegmentCounter, writer.getSegmentInfosCounter()); + + writer.advanceSegmentInfosCounter(1000); + assertEquals(1000, writer.getSegmentInfosCounter()); + + // add 40 documents + for (int i = 10; i < 50; i++) { + addDocWithIndex(writer, i); + writer.commit(); + } + + assertEquals(1041, writer.getSegmentInfosCounter()); + + IndexWriter.DocStats docStats = writer.getDocStats(); + assertEquals(50, docStats.maxDoc); + assertEquals(50, docStats.numDocs); + writer.close(); + + // check that the index reader gives the same numbers. + reader = DirectoryReader.open(dir); + assertEquals(50, reader.maxDoc()); + assertEquals(50, reader.numDocs()); + reader.close(); + dir.close(); + } } From 86f97b24bcd53c74fe6d1285dab4c6057b5d4d80 Mon Sep 17 00:00:00 2001 From: guojialiang Date: Thu, 27 Mar 2025 15:21:06 +0800 Subject: [PATCH 2/7] fix UT Signed-off-by: guojialiang --- .../src/test/org/apache/lucene/index/TestIndexWriter.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index acae3327b845..ddf9d2fdae54 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5064,7 +5064,9 @@ public void testAdvanceSegmentInfosCounter() throws IOException { writer.commit(); } - assertEquals(1041, writer.getSegmentInfosCounter()); + // There may be merge operations in the background, here only verifies that the current segment + // counter is greater than 1000. + assertTrue(writer.getSegmentInfosCounter() > 1000); IndexWriter.DocStats docStats = writer.getDocStats(); assertEquals(50, docStats.maxDoc); From 5f4077f94708159c88db309a0e697c315437eaed Mon Sep 17 00:00:00 2001 From: guojialiang Date: Thu, 27 Mar 2025 16:29:42 +0800 Subject: [PATCH 3/7] Some CR-related modifications Signed-off-by: guojialiang --- .../core/src/java/org/apache/lucene/index/IndexWriter.java | 3 ++- .../src/test/org/apache/lucene/index/TestIndexWriter.java | 7 ++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 80b44f256403..772b35fad94b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1437,10 +1437,11 @@ public synchronized void advanceSegmentInfosCounter(long newCounter) { if (segmentInfos.counter < newCounter) { segmentInfos.counter = newCounter; } + changed(); } /** Returns the {@link SegmentInfos#counter}. */ - public synchronized long getSegmentInfosCounter() { + public long getSegmentInfosCounter() { this.ensureOpen(); return segmentInfos.counter; } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index ddf9d2fdae54..2cc247d2c031 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5051,13 +5051,10 @@ public void testAdvanceSegmentInfosCounter() throws IOException { addDocWithIndex(writer, i); writer.commit(); } - long beforeAdvanceSegmentCounter = writer.getSegmentInfosCounter(); writer.advanceSegmentInfosCounter(1); - assertEquals(beforeAdvanceSegmentCounter, writer.getSegmentInfosCounter()); + assertTrue(writer.getSegmentInfosCounter() >= 1); writer.advanceSegmentInfosCounter(1000); - assertEquals(1000, writer.getSegmentInfosCounter()); - // add 40 documents for (int i = 10; i < 50; i++) { addDocWithIndex(writer, i); @@ -5066,7 +5063,7 @@ public void testAdvanceSegmentInfosCounter() throws IOException { // There may be merge operations in the background, here only verifies that the current segment // counter is greater than 1000. - assertTrue(writer.getSegmentInfosCounter() > 1000); + assertTrue(writer.getSegmentInfosCounter() >= 1000); IndexWriter.DocStats docStats = writer.getDocStats(); assertEquals(50, docStats.maxDoc); From cba28e33a2fadf28fc0496e32a5336fa98a5babe Mon Sep 17 00:00:00 2001 From: guojialiang Date: Mon, 31 Mar 2025 17:11:47 +0800 Subject: [PATCH 4/7] add test Signed-off-by: guojialiang --- .../apache/lucene/index/TestIndexWriter.java | 47 ++++++++++++++++++- .../ThreadedIndexingAndSearchingTestCase.java | 20 +++++++- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 2cc247d2c031..113826c38c52 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5040,10 +5040,8 @@ public void testDocValuesSkippingIndexWithoutDocValues() throws Exception { public void testAdvanceSegmentInfosCounter() throws IOException { Directory dir = newDirectory(); - IndexWriter writer; IndexReader reader; - writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); // add 10 documents @@ -5077,4 +5075,49 @@ public void testAdvanceSegmentInfosCounter() throws IOException { reader.close(); dir.close(); } + + public void testAdvanceSegmentCounterInCrashAndRecoveryScenario() throws IOException { + Directory dir = newDirectory(); + IndexWriter writer; + IndexReader reader; + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + + // add 100 documents + for (int i = 0; i < 100; i++) { + addDocWithIndex(writer, i); + if (random().nextBoolean()) { + writer.commit(); + } + } + IndexWriter.DocStats docStats = writer.getDocStats(); + assertEquals(100, docStats.maxDoc); + assertEquals(100, docStats.numDocs); + writer.commit(); + writer.close(); + + // recovery and advance segment counter + writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); + assertEquals(100, writer.getDocStats().numDocs); + long newSegmentCounter = writer.getSegmentInfosCounter() + 1000; + writer.advanceSegmentInfosCounter(newSegmentCounter); + + // add 10 documents + for (int i = 0; i < 10; i++) { + addDocWithIndex(writer, i); + if (random().nextBoolean()) { + writer.commit(); + } + } + + assertTrue(writer.getSegmentInfosCounter() >= newSegmentCounter); + + assertEquals(110, writer.getDocStats().numDocs); + // check that the index reader gives the same numbers. + reader = DirectoryReader.open(dir); + assertEquals(110, reader.maxDoc()); + assertEquals(110, reader.numDocs()); + reader.close(); + writer.close(); + dir.close(); + } } diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java index 44c72caa3978..e6e3a7a7e01d 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java @@ -82,6 +82,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas protected final AtomicInteger addCount = new AtomicInteger(); protected final AtomicInteger delCount = new AtomicInteger(); protected final AtomicInteger packCount = new AtomicInteger(); + protected AtomicLong maxAdvancedSegmentCounter = new AtomicLong(0); protected Directory dir; protected IndexWriter writer; @@ -189,6 +190,19 @@ public void run() { addedField = null; } + // Maybe advance segment counter + if (random().nextBoolean()) { + long newSegmentCounter = writer.getSegmentInfosCounter() + 100; + writer.advanceSegmentInfosCounter(newSegmentCounter); + if (VERBOSE) { + System.out.println( + Thread.currentThread().getName() + + " advance segment counter to " + + newSegmentCounter); + } + maxAdvancedSegmentCounter.accumulateAndGet(newSegmentCounter, Math::max); + } + if (random().nextBoolean()) { if (random().nextBoolean()) { @@ -606,6 +620,8 @@ public void message(String component, String message) { thread.join(); } + assertTrue(writer.getSegmentInfosCounter() >= maxAdvancedSegmentCounter.get()); + if (VERBOSE) { System.out.println( "TEST: done join indexing threads [" @@ -613,7 +629,9 @@ public void message(String component, String message) { + " ms]; addCount=" + addCount + " delCount=" - + delCount); + + delCount + + " segmentCounter=" + + writer.getSegmentInfosCounter()); } final IndexSearcher s = getFinalSearcher(); From df7aaa79822cbb9b096a90a78415c52c200119ee Mon Sep 17 00:00:00 2001 From: guojialiang Date: Mon, 31 Mar 2025 19:13:52 +0800 Subject: [PATCH 5/7] add test Signed-off-by: guojialiang --- .../core/src/test/org/apache/lucene/index/TestIndexWriter.java | 1 + 1 file changed, 1 insertion(+) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index 113826c38c52..e64901080712 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -5113,6 +5113,7 @@ public void testAdvanceSegmentCounterInCrashAndRecoveryScenario() throws IOExcep assertEquals(110, writer.getDocStats().numDocs); // check that the index reader gives the same numbers. + writer.commit(); reader = DirectoryReader.open(dir); assertEquals(110, reader.maxDoc()); assertEquals(110, reader.numDocs()); From e6a5340e3d857b1ef86fd2ebf896645790de73a1 Mon Sep 17 00:00:00 2001 From: guojialiang Date: Tue, 8 Apr 2025 16:32:51 +0800 Subject: [PATCH 6/7] Modify tests and CHANGES.txt Signed-off-by: guojialiang --- lucene/CHANGES.txt | 2 ++ .../tests/index/ThreadedIndexingAndSearchingTestCase.java | 5 +++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 867f53b1e06c..9bd95bc00327 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -22,6 +22,8 @@ API Changes * GITHUB#14291: Remove IOException from ScorerSupplier#setTopLevelScoringClause signature (Luca Cavanna) +* GITHUB#14417: Support modifying segmentInfos.counter in IndexWriter (Jialiang Guo) + New Features --------------------- * GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov) diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java index e6e3a7a7e01d..9f13fd2a8634 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/ThreadedIndexingAndSearchingTestCase.java @@ -190,8 +190,9 @@ public void run() { addedField = null; } - // Maybe advance segment counter - if (random().nextBoolean()) { + // Maybe advance segment counter. Run with a relatively low probability to avoid + // testing slowdowns caused by synchronous operations. + if (random().nextInt(7) == 5) { long newSegmentCounter = writer.getSegmentInfosCounter() + 100; writer.advanceSegmentInfosCounter(newSegmentCounter); if (VERBOSE) { From 63a9c124273afdf9b2ddf154df7df9a4953be2e7 Mon Sep 17 00:00:00 2001 From: Vigya Sharma Date: Tue, 8 Apr 2025 22:39:53 -0700 Subject: [PATCH 7/7] Move changes entry to 10.3 --- lucene/CHANGES.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 9bd95bc00327..645506f4c642 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -22,8 +22,6 @@ API Changes * GITHUB#14291: Remove IOException from ScorerSupplier#setTopLevelScoringClause signature (Luca Cavanna) -* GITHUB#14417: Support modifying segmentInfos.counter in IndexWriter (Jialiang Guo) - New Features --------------------- * GITHUB#14097: Binary partitioning merge policy over float-valued vector field. (Mike Sokolov) @@ -61,6 +59,8 @@ API Changes * GITHUB#14401: Added LeafCollector#collectRange to enable collector to take advantage of pre-aggregated data to speed up faceting. (Adrien Grand) +* GITHUB#14417: Support modifying segmentInfos.counter in IndexWriter (Jialiang Guo) + New Features --------------------- (No changes)