From a6b852cf3a3ac9dd5787265b8caae4442228d489 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Fri, 17 May 2024 21:50:53 +0200 Subject: [PATCH 1/3] Reduce the overhead of `IndexInput#prefetch` when data is cached in RAM. As Robert pointed out and benchmarks confirmed, there is some (small) overhead to calling `madvise` via the foreign function API, benchmarks suggest it is in the order of 1-2us. This is not much for a single call, but may become non-negligible across many calls. Until now, we only looked into using prefetch() for terms, skip data and postings start pointers which are a single prefetch() operation per segment per term. But we may want to start using it in cases that could result into more calls to `madvise`, e.g. if we start using it for stored fields and a user requests 10k documents. In #13337, Robert wondered if we could take advantage of `mincore()` to reduce the overhead of `IndexInput#prefetch()`, which is what this PR is doing. For now, this is trying to not add new APIs. Instead, `IndexInput#prefetch` tracks consecutive hits on the page cache and calls `madvise` less and less frequently under the hood as the number of cache hits increases. --- .../java/org/apache/lucene/util/BitUtil.java | 8 +++ .../lucene/store/MemorySegmentIndexInput.java | 16 +++++- .../org/apache/lucene/store/NativeAccess.java | 3 ++ .../lucene/store/PosixNativeAccess.java | 54 +++++++++++++++++++ .../org/apache/lucene/util/TestBitUtil.java | 37 +++++++++++++ 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java diff --git a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java index 850cb2618093..2389e3b0485d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java @@ -303,4 +303,12 @@ public static int zigZagDecode(int i) { public static long zigZagDecode(long l) { return ((l >>> 1) ^ -(l & 1)); } + + /** + * Return true if, and only if, the provided integer - treated as an unsigned integer - is either + * 0 or a power of two. + */ + public static boolean isZeroOrPowerOfTwo(int x) { + return (x & (x - 1)) == 0; + } } diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index a0e7e60e99e6..bf5263ca2681 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -26,6 +26,7 @@ import java.util.Objects; import java.util.Optional; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.GroupVIntUtil; /** @@ -57,6 +58,7 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces MemorySegment curSegment; // redundant for speed: segments[curSegmentIndex], also marker if closed! long curPosition; // relative to curSegment, not globally + int consecutivePrefetchHitCount; public static MemorySegmentIndexInput newInstance( String resourceDescription, @@ -318,6 +320,14 @@ public void prefetch(long offset, long length) throws IOException { Objects.checkFromIndexSize(offset, length, length()); + if (BitUtil.isZeroOrPowerOfTwo(consecutivePrefetchHitCount++) == false) { + // We've had enough consecutive hits on the page cache that this number is neither zero nor a + // power of two. There is a good chance that a good chunk of this index input is cached in + // physical memory. Let's skip the overhead of the madvise system call, we'll be trying again + // on the next power of two of the counter. + return; + } + if (NATIVE_ACCESS.isEmpty()) { return; } @@ -344,7 +354,11 @@ public void prefetch(long offset, long length) throws IOException { } final MemorySegment prefetchSlice = segment.asSlice(offset, length); - nativeAccess.madviseWillNeed(prefetchSlice); + if (nativeAccess.mincore(prefetchSlice) == false) { + // We have a cache miss on at least one page, let's reset the counter. + consecutivePrefetchHitCount = 0; + nativeAccess.madviseWillNeed(prefetchSlice); + } } catch ( @SuppressWarnings("unused") IndexOutOfBoundsException e) { diff --git a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java index affc0e2ac719..99275a81b878 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java @@ -32,6 +32,9 @@ abstract class NativeAccess { */ public abstract void madviseWillNeed(MemorySegment segment) throws IOException; + /** Returns {@code true} if pages from the given {@link MemorySegment} are resident in RAM. */ + public abstract boolean mincore(MemorySegment segment) throws IOException; + /** Returns native page size. */ public abstract int getPageSize(); diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java index 93caca788b1b..044387d4d8bd 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java @@ -17,6 +17,7 @@ package org.apache.lucene.store; import java.io.IOException; +import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; import java.lang.foreign.MemorySegment; @@ -50,6 +51,7 @@ final class PosixNativeAccess extends NativeAccess { public static final int POSIX_MADV_DONTNEED = 4; private static final MethodHandle MH$posix_madvise; + private static final MethodHandle MH$mincore; private static final int PAGE_SIZE; private static final Optional INSTANCE; @@ -64,10 +66,14 @@ static Optional getInstance() { final Linker linker = Linker.nativeLinker(); final SymbolLookup stdlib = linker.defaultLookup(); MethodHandle adviseHandle = null; + MethodHandle mincoreHandle = null; int pagesize = -1; PosixNativeAccess instance = null; try { adviseHandle = lookupMadvise(linker, stdlib); + // TODO: is mincore available on all systems where we need it? Do we need to handle the case + // when it's missing? + mincoreHandle = lookupMincore(linker, stdlib); pagesize = (int) lookupGetPageSize(linker, stdlib).invokeExact(); instance = new PosixNativeAccess(); } catch (UnsupportedOperationException uoe) { @@ -88,6 +94,7 @@ static Optional getInstance() { throw new AssertionError(e); } MH$posix_madvise = adviseHandle; + MH$mincore = mincoreHandle; PAGE_SIZE = pagesize; INSTANCE = Optional.ofNullable(instance); } @@ -104,6 +111,15 @@ private static MethodHandle lookupMadvise(Linker linker, SymbolLookup stdlib) { ValueLayout.JAVA_INT)); } + private static MethodHandle lookupMincore(Linker linker, SymbolLookup stdlib) { + return findFunction( + linker, + stdlib, + "mincore", + FunctionDescriptor.of( + ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS)); + } + private static MethodHandle lookupGetPageSize(Linker linker, SymbolLookup stdlib) { return findFunction(linker, stdlib, "getpagesize", FunctionDescriptor.of(ValueLayout.JAVA_INT)); } @@ -165,6 +181,44 @@ private Integer mapReadAdvice(ReadAdvice readAdvice) { }; } + @Override + public boolean mincore(MemorySegment segment) throws IOException { + final long numPages = (segment.byteSize() + getPageSize() - 1) / getPageSize(); + try (Arena arena = Arena.ofConfined()) { + MemorySegment vec = arena.allocate(numPages); + mincore(segment, vec); + for (long i = 0; i < numPages; ++i) { + byte b = vec.get(ValueLayout.JAVA_BYTE, i); + if (b == 0) { + return false; + } + } + return true; + } + } + + private static void mincore(MemorySegment segment, MemorySegment vec) throws IOException { + if (segment.byteSize() == 0L) { + return; // empty segments should be excluded, because they may have no address at all + } + final int ret; + try { + ret = (int) MH$mincore.invokeExact(segment, segment.byteSize(), vec); + } catch (Throwable th) { + throw new AssertionError(th); + } + if (ret != 0) { + throw new IOException( + String.format( + Locale.ENGLISH, + "Call to mincore with address=0x%08X, byteSize=%d and vec.byteSize=%d failed with return code %d.", + segment.address(), + segment.byteSize(), + vec.byteSize(), + ret)); + } + } + @Override public int getPageSize() { return PAGE_SIZE; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java new file mode 100644 index 000000000000..c6c6823fcf8f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestBitUtil extends LuceneTestCase { + + public void testIsZeroOrPowerOfTwo() { + assertTrue(BitUtil.isZeroOrPowerOfTwo(0)); + for (int shift = 0; shift <= 31; ++shift) { + assertTrue(BitUtil.isZeroOrPowerOfTwo(1 << shift)); + } + assertFalse(BitUtil.isZeroOrPowerOfTwo(3)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(5)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(6)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(7)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(9)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE + 2)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(-1)); + } +} From b95907d1dd6fc3a5036e34e6497df329e47825e6 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 19 May 2024 22:36:07 +0200 Subject: [PATCH 2/3] Switch to `#isLoaded`. --- .../lucene/store/MemorySegmentIndexInput.java | 2 +- .../org/apache/lucene/store/NativeAccess.java | 3 -- .../lucene/store/PosixNativeAccess.java | 54 ------------------- 3 files changed, 1 insertion(+), 58 deletions(-) diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index bf5263ca2681..ea76bb6a13fc 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -354,7 +354,7 @@ public void prefetch(long offset, long length) throws IOException { } final MemorySegment prefetchSlice = segment.asSlice(offset, length); - if (nativeAccess.mincore(prefetchSlice) == false) { + if (prefetchSlice.isLoaded() == false) { // We have a cache miss on at least one page, let's reset the counter. consecutivePrefetchHitCount = 0; nativeAccess.madviseWillNeed(prefetchSlice); diff --git a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java index 99275a81b878..affc0e2ac719 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java @@ -32,9 +32,6 @@ abstract class NativeAccess { */ public abstract void madviseWillNeed(MemorySegment segment) throws IOException; - /** Returns {@code true} if pages from the given {@link MemorySegment} are resident in RAM. */ - public abstract boolean mincore(MemorySegment segment) throws IOException; - /** Returns native page size. */ public abstract int getPageSize(); diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java index 044387d4d8bd..93caca788b1b 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java @@ -17,7 +17,6 @@ package org.apache.lucene.store; import java.io.IOException; -import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; import java.lang.foreign.MemorySegment; @@ -51,7 +50,6 @@ final class PosixNativeAccess extends NativeAccess { public static final int POSIX_MADV_DONTNEED = 4; private static final MethodHandle MH$posix_madvise; - private static final MethodHandle MH$mincore; private static final int PAGE_SIZE; private static final Optional INSTANCE; @@ -66,14 +64,10 @@ static Optional getInstance() { final Linker linker = Linker.nativeLinker(); final SymbolLookup stdlib = linker.defaultLookup(); MethodHandle adviseHandle = null; - MethodHandle mincoreHandle = null; int pagesize = -1; PosixNativeAccess instance = null; try { adviseHandle = lookupMadvise(linker, stdlib); - // TODO: is mincore available on all systems where we need it? Do we need to handle the case - // when it's missing? - mincoreHandle = lookupMincore(linker, stdlib); pagesize = (int) lookupGetPageSize(linker, stdlib).invokeExact(); instance = new PosixNativeAccess(); } catch (UnsupportedOperationException uoe) { @@ -94,7 +88,6 @@ static Optional getInstance() { throw new AssertionError(e); } MH$posix_madvise = adviseHandle; - MH$mincore = mincoreHandle; PAGE_SIZE = pagesize; INSTANCE = Optional.ofNullable(instance); } @@ -111,15 +104,6 @@ private static MethodHandle lookupMadvise(Linker linker, SymbolLookup stdlib) { ValueLayout.JAVA_INT)); } - private static MethodHandle lookupMincore(Linker linker, SymbolLookup stdlib) { - return findFunction( - linker, - stdlib, - "mincore", - FunctionDescriptor.of( - ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS)); - } - private static MethodHandle lookupGetPageSize(Linker linker, SymbolLookup stdlib) { return findFunction(linker, stdlib, "getpagesize", FunctionDescriptor.of(ValueLayout.JAVA_INT)); } @@ -181,44 +165,6 @@ private Integer mapReadAdvice(ReadAdvice readAdvice) { }; } - @Override - public boolean mincore(MemorySegment segment) throws IOException { - final long numPages = (segment.byteSize() + getPageSize() - 1) / getPageSize(); - try (Arena arena = Arena.ofConfined()) { - MemorySegment vec = arena.allocate(numPages); - mincore(segment, vec); - for (long i = 0; i < numPages; ++i) { - byte b = vec.get(ValueLayout.JAVA_BYTE, i); - if (b == 0) { - return false; - } - } - return true; - } - } - - private static void mincore(MemorySegment segment, MemorySegment vec) throws IOException { - if (segment.byteSize() == 0L) { - return; // empty segments should be excluded, because they may have no address at all - } - final int ret; - try { - ret = (int) MH$mincore.invokeExact(segment, segment.byteSize(), vec); - } catch (Throwable th) { - throw new AssertionError(th); - } - if (ret != 0) { - throw new IOException( - String.format( - Locale.ENGLISH, - "Call to mincore with address=0x%08X, byteSize=%d and vec.byteSize=%d failed with return code %d.", - segment.address(), - segment.byteSize(), - vec.byteSize(), - ret)); - } - } - @Override public int getPageSize() { return PAGE_SIZE; From 925912ee52e3f8006edb49d699ca1446aa4f1f31 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 19 May 2024 22:37:33 +0200 Subject: [PATCH 3/3] Move NATIVE_ACCESS check to the top. --- .../org/apache/lucene/store/MemorySegmentIndexInput.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index ea76bb6a13fc..e9f650e6be69 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -316,6 +316,10 @@ public void seek(long pos) throws IOException { @Override public void prefetch(long offset, long length) throws IOException { + if (NATIVE_ACCESS.isEmpty()) { + return; + } + ensureOpen(); Objects.checkFromIndexSize(offset, length, length()); @@ -328,9 +332,6 @@ public void prefetch(long offset, long length) throws IOException { return; } - if (NATIVE_ACCESS.isEmpty()) { - return; - } final NativeAccess nativeAccess = NATIVE_ACCESS.get(); try {