Skip to content

Support modifying segmentInfos.counter in IndexWriter #14417

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ API Changes
* GITHUB#14401: Added LeafCollector#collectRange to enable collector to take
advantage of pre-aggregated data to speed up faceting. (Adrien Grand)

* GITHUB#14417: Support modifying segmentInfos.counter in IndexWriter (Jialiang Guo)

New Features
---------------------
(No changes)
Expand Down
19 changes: 19 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,25 @@ public synchronized void advanceSegmentInfosVersion(long newVersion) {
changed();
}

/**
* If {@link SegmentInfos#counter} is below {@code newCounter} then update it to this value.
*
* @lucene.internal
*/
public synchronized void advanceSegmentInfosCounter(long newCounter) {
this.ensureOpen();
if (segmentInfos.counter < newCounter) {
segmentInfos.counter = newCounter;
}
changed();
}

/** Returns the {@link SegmentInfos#counter}. */
public long getSegmentInfosCounter() {
this.ensureOpen();
return segmentInfos.counter;
}

/**
* Returns true if this index has deletions (including buffered deletions). Note that this will
* return true if there are buffered Term/Query deletions, even if it turns out those buffered
Expand Down
84 changes: 84 additions & 0 deletions lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -5037,4 +5037,88 @@ public void testDocValuesSkippingIndexWithoutDocValues() throws Exception {
}
}
}

public void testAdvanceSegmentInfosCounter() throws IOException {
Directory dir = newDirectory();
IndexWriter writer;
IndexReader reader;
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));

// add 10 documents
for (int i = 0; i < 10; i++) {
addDocWithIndex(writer, i);
writer.commit();
}
writer.advanceSegmentInfosCounter(1);
assertTrue(writer.getSegmentInfosCounter() >= 1);

writer.advanceSegmentInfosCounter(1000);
// add 40 documents
for (int i = 10; i < 50; i++) {
addDocWithIndex(writer, i);
writer.commit();
}

// There may be merge operations in the background, here only verifies that the current segment
// counter is greater than 1000.
assertTrue(writer.getSegmentInfosCounter() >= 1000);

IndexWriter.DocStats docStats = writer.getDocStats();
assertEquals(50, docStats.maxDoc);
assertEquals(50, docStats.numDocs);
writer.close();

// check that the index reader gives the same numbers.
reader = DirectoryReader.open(dir);
assertEquals(50, reader.maxDoc());
assertEquals(50, reader.numDocs());
reader.close();
dir.close();
}

public void testAdvanceSegmentCounterInCrashAndRecoveryScenario() throws IOException {
Directory dir = newDirectory();
IndexWriter writer;
IndexReader reader;
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));

// add 100 documents
for (int i = 0; i < 100; i++) {
addDocWithIndex(writer, i);
if (random().nextBoolean()) {
writer.commit();
}
}
IndexWriter.DocStats docStats = writer.getDocStats();
assertEquals(100, docStats.maxDoc);
assertEquals(100, docStats.numDocs);
writer.commit();
writer.close();

// recovery and advance segment counter
writer = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random())));
assertEquals(100, writer.getDocStats().numDocs);
long newSegmentCounter = writer.getSegmentInfosCounter() + 1000;
writer.advanceSegmentInfosCounter(newSegmentCounter);

// add 10 documents
for (int i = 0; i < 10; i++) {
addDocWithIndex(writer, i);
if (random().nextBoolean()) {
writer.commit();
}
}

assertTrue(writer.getSegmentInfosCounter() >= newSegmentCounter);

assertEquals(110, writer.getDocStats().numDocs);
// check that the index reader gives the same numbers.
writer.commit();
reader = DirectoryReader.open(dir);
assertEquals(110, reader.maxDoc());
assertEquals(110, reader.numDocs());
reader.close();
writer.close();
dir.close();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ public abstract class ThreadedIndexingAndSearchingTestCase extends LuceneTestCas
protected final AtomicInteger addCount = new AtomicInteger();
protected final AtomicInteger delCount = new AtomicInteger();
protected final AtomicInteger packCount = new AtomicInteger();
protected AtomicLong maxAdvancedSegmentCounter = new AtomicLong(0);

protected Directory dir;
protected IndexWriter writer;
Expand Down Expand Up @@ -189,6 +190,20 @@ public void run() {
addedField = null;
}

// Maybe advance segment counter. Run with a relatively low probability to avoid
// testing slowdowns caused by synchronous operations.
if (random().nextInt(7) == 5) {
long newSegmentCounter = writer.getSegmentInfosCounter() + 100;
writer.advanceSegmentInfosCounter(newSegmentCounter);
if (VERBOSE) {
System.out.println(
Thread.currentThread().getName()
+ " advance segment counter to "
+ newSegmentCounter);
}
maxAdvancedSegmentCounter.accumulateAndGet(newSegmentCounter, Math::max);
}

if (random().nextBoolean()) {

if (random().nextBoolean()) {
Expand Down Expand Up @@ -606,14 +621,18 @@ public void message(String component, String message) {
thread.join();
}

assertTrue(writer.getSegmentInfosCounter() >= maxAdvancedSegmentCounter.get());

if (VERBOSE) {
System.out.println(
"TEST: done join indexing threads ["
+ TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - t0)
+ " ms]; addCount="
+ addCount
+ " delCount="
+ delCount);
+ delCount
+ " segmentCounter="
+ writer.getSegmentInfosCounter());
}

final IndexSearcher s = getFinalSearcher();
Expand Down