diff --git a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java index 850cb2618093..2389e3b0485d 100644 --- a/lucene/core/src/java/org/apache/lucene/util/BitUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/BitUtil.java @@ -303,4 +303,12 @@ public static int zigZagDecode(int i) { public static long zigZagDecode(long l) { return ((l >>> 1) ^ -(l & 1)); } + + /** + * Return true if, and only if, the provided integer - treated as an unsigned integer - is either + * 0 or a power of two. + */ + public static boolean isZeroOrPowerOfTwo(int x) { + return (x & (x - 1)) == 0; + } } diff --git a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java index a0e7e60e99e6..bf5263ca2681 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java +++ b/lucene/core/src/java21/org/apache/lucene/store/MemorySegmentIndexInput.java @@ -26,6 +26,7 @@ import java.util.Objects; import java.util.Optional; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.GroupVIntUtil; /** @@ -57,6 +58,7 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces MemorySegment curSegment; // redundant for speed: segments[curSegmentIndex], also marker if closed! long curPosition; // relative to curSegment, not globally + int consecutivePrefetchHitCount; public static MemorySegmentIndexInput newInstance( String resourceDescription, @@ -318,6 +320,14 @@ public void prefetch(long offset, long length) throws IOException { Objects.checkFromIndexSize(offset, length, length()); + if (BitUtil.isZeroOrPowerOfTwo(consecutivePrefetchHitCount++) == false) { + // We've had enough consecutive hits on the page cache that this number is neither zero nor a + // power of two. There is a good chance that a good chunk of this index input is cached in + // physical memory. Let's skip the overhead of the madvise system call, we'll be trying again + // on the next power of two of the counter. + return; + } + if (NATIVE_ACCESS.isEmpty()) { return; } @@ -344,7 +354,11 @@ public void prefetch(long offset, long length) throws IOException { } final MemorySegment prefetchSlice = segment.asSlice(offset, length); - nativeAccess.madviseWillNeed(prefetchSlice); + if (nativeAccess.mincore(prefetchSlice) == false) { + // We have a cache miss on at least one page, let's reset the counter. + consecutivePrefetchHitCount = 0; + nativeAccess.madviseWillNeed(prefetchSlice); + } } catch ( @SuppressWarnings("unused") IndexOutOfBoundsException e) { diff --git a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java index affc0e2ac719..99275a81b878 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/NativeAccess.java @@ -32,6 +32,9 @@ abstract class NativeAccess { */ public abstract void madviseWillNeed(MemorySegment segment) throws IOException; + /** Returns {@code true} if pages from the given {@link MemorySegment} are resident in RAM. */ + public abstract boolean mincore(MemorySegment segment) throws IOException; + /** Returns native page size. */ public abstract int getPageSize(); diff --git a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java index 93caca788b1b..044387d4d8bd 100644 --- a/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java +++ b/lucene/core/src/java21/org/apache/lucene/store/PosixNativeAccess.java @@ -17,6 +17,7 @@ package org.apache.lucene.store; import java.io.IOException; +import java.lang.foreign.Arena; import java.lang.foreign.FunctionDescriptor; import java.lang.foreign.Linker; import java.lang.foreign.MemorySegment; @@ -50,6 +51,7 @@ final class PosixNativeAccess extends NativeAccess { public static final int POSIX_MADV_DONTNEED = 4; private static final MethodHandle MH$posix_madvise; + private static final MethodHandle MH$mincore; private static final int PAGE_SIZE; private static final Optional INSTANCE; @@ -64,10 +66,14 @@ static Optional getInstance() { final Linker linker = Linker.nativeLinker(); final SymbolLookup stdlib = linker.defaultLookup(); MethodHandle adviseHandle = null; + MethodHandle mincoreHandle = null; int pagesize = -1; PosixNativeAccess instance = null; try { adviseHandle = lookupMadvise(linker, stdlib); + // TODO: is mincore available on all systems where we need it? Do we need to handle the case + // when it's missing? + mincoreHandle = lookupMincore(linker, stdlib); pagesize = (int) lookupGetPageSize(linker, stdlib).invokeExact(); instance = new PosixNativeAccess(); } catch (UnsupportedOperationException uoe) { @@ -88,6 +94,7 @@ static Optional getInstance() { throw new AssertionError(e); } MH$posix_madvise = adviseHandle; + MH$mincore = mincoreHandle; PAGE_SIZE = pagesize; INSTANCE = Optional.ofNullable(instance); } @@ -104,6 +111,15 @@ private static MethodHandle lookupMadvise(Linker linker, SymbolLookup stdlib) { ValueLayout.JAVA_INT)); } + private static MethodHandle lookupMincore(Linker linker, SymbolLookup stdlib) { + return findFunction( + linker, + stdlib, + "mincore", + FunctionDescriptor.of( + ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS)); + } + private static MethodHandle lookupGetPageSize(Linker linker, SymbolLookup stdlib) { return findFunction(linker, stdlib, "getpagesize", FunctionDescriptor.of(ValueLayout.JAVA_INT)); } @@ -165,6 +181,44 @@ private Integer mapReadAdvice(ReadAdvice readAdvice) { }; } + @Override + public boolean mincore(MemorySegment segment) throws IOException { + final long numPages = (segment.byteSize() + getPageSize() - 1) / getPageSize(); + try (Arena arena = Arena.ofConfined()) { + MemorySegment vec = arena.allocate(numPages); + mincore(segment, vec); + for (long i = 0; i < numPages; ++i) { + byte b = vec.get(ValueLayout.JAVA_BYTE, i); + if (b == 0) { + return false; + } + } + return true; + } + } + + private static void mincore(MemorySegment segment, MemorySegment vec) throws IOException { + if (segment.byteSize() == 0L) { + return; // empty segments should be excluded, because they may have no address at all + } + final int ret; + try { + ret = (int) MH$mincore.invokeExact(segment, segment.byteSize(), vec); + } catch (Throwable th) { + throw new AssertionError(th); + } + if (ret != 0) { + throw new IOException( + String.format( + Locale.ENGLISH, + "Call to mincore with address=0x%08X, byteSize=%d and vec.byteSize=%d failed with return code %d.", + segment.address(), + segment.byteSize(), + vec.byteSize(), + ret)); + } + } + @Override public int getPageSize() { return PAGE_SIZE; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java new file mode 100644 index 000000000000..c6c6823fcf8f --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + +import org.apache.lucene.tests.util.LuceneTestCase; + +public class TestBitUtil extends LuceneTestCase { + + public void testIsZeroOrPowerOfTwo() { + assertTrue(BitUtil.isZeroOrPowerOfTwo(0)); + for (int shift = 0; shift <= 31; ++shift) { + assertTrue(BitUtil.isZeroOrPowerOfTwo(1 << shift)); + } + assertFalse(BitUtil.isZeroOrPowerOfTwo(3)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(5)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(6)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(7)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(9)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE + 2)); + assertFalse(BitUtil.isZeroOrPowerOfTwo(-1)); + } +}