Skip to content

Commit

Permalink
Reduce the overhead of IndexInput#prefetch when data is cached in RAM.
Browse files Browse the repository at this point in the history
As Robert pointed out and benchmarks confirmed, there is some (small) overhead
to calling `madvise` via the foreign function API, benchmarks suggest it is in
the order of 1-2us. This is not much for a single call, but may become
non-negligible across many calls. Until now, we only looked into using
prefetch() for terms, skip data and postings start pointers which are a single
prefetch() operation per segment per term.

But we may want to start using it in cases that could result into more calls to
`madvise`, e.g. if we start using it for stored fields and a user requests 10k
documents. In apache#13337, Robert wondered if we could take advantage of `mincore()`
to reduce the overhead of `IndexInput#prefetch()`, which is what this PR is
doing.

For now, this is trying to not add new APIs. Instead, `IndexInput#prefetch`
tracks consecutive hits on the page cache and calls `madvise` less and less
frequently under the hood as the number of cache hits increases.
  • Loading branch information
jpountz committed May 17, 2024
1 parent 2c81649 commit a6b852c
Show file tree
Hide file tree
Showing 5 changed files with 117 additions and 1 deletion.
8 changes: 8 additions & 0 deletions lucene/core/src/java/org/apache/lucene/util/BitUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -303,4 +303,12 @@ public static int zigZagDecode(int i) {
public static long zigZagDecode(long l) {
return ((l >>> 1) ^ -(l & 1));
}

/**
* Return true if, and only if, the provided integer - treated as an unsigned integer - is either
* 0 or a power of two.
*/
public static boolean isZeroOrPowerOfTwo(int x) {
return (x & (x - 1)) == 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import java.util.Objects;
import java.util.Optional;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BitUtil;
import org.apache.lucene.util.GroupVIntUtil;

/**
Expand Down Expand Up @@ -57,6 +58,7 @@ abstract class MemorySegmentIndexInput extends IndexInput implements RandomAcces
MemorySegment
curSegment; // redundant for speed: segments[curSegmentIndex], also marker if closed!
long curPosition; // relative to curSegment, not globally
int consecutivePrefetchHitCount;

public static MemorySegmentIndexInput newInstance(
String resourceDescription,
Expand Down Expand Up @@ -318,6 +320,14 @@ public void prefetch(long offset, long length) throws IOException {

Objects.checkFromIndexSize(offset, length, length());

if (BitUtil.isZeroOrPowerOfTwo(consecutivePrefetchHitCount++) == false) {
// We've had enough consecutive hits on the page cache that this number is neither zero nor a
// power of two. There is a good chance that a good chunk of this index input is cached in
// physical memory. Let's skip the overhead of the madvise system call, we'll be trying again
// on the next power of two of the counter.
return;
}

if (NATIVE_ACCESS.isEmpty()) {
return;
}
Expand All @@ -344,7 +354,11 @@ public void prefetch(long offset, long length) throws IOException {
}

final MemorySegment prefetchSlice = segment.asSlice(offset, length);
nativeAccess.madviseWillNeed(prefetchSlice);
if (nativeAccess.mincore(prefetchSlice) == false) {
// We have a cache miss on at least one page, let's reset the counter.
consecutivePrefetchHitCount = 0;
nativeAccess.madviseWillNeed(prefetchSlice);
}
} catch (
@SuppressWarnings("unused")
IndexOutOfBoundsException e) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ abstract class NativeAccess {
*/
public abstract void madviseWillNeed(MemorySegment segment) throws IOException;

/** Returns {@code true} if pages from the given {@link MemorySegment} are resident in RAM. */
public abstract boolean mincore(MemorySegment segment) throws IOException;

/** Returns native page size. */
public abstract int getPageSize();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.store;

import java.io.IOException;
import java.lang.foreign.Arena;
import java.lang.foreign.FunctionDescriptor;
import java.lang.foreign.Linker;
import java.lang.foreign.MemorySegment;
Expand Down Expand Up @@ -50,6 +51,7 @@ final class PosixNativeAccess extends NativeAccess {
public static final int POSIX_MADV_DONTNEED = 4;

private static final MethodHandle MH$posix_madvise;
private static final MethodHandle MH$mincore;
private static final int PAGE_SIZE;

private static final Optional<NativeAccess> INSTANCE;
Expand All @@ -64,10 +66,14 @@ static Optional<NativeAccess> getInstance() {
final Linker linker = Linker.nativeLinker();
final SymbolLookup stdlib = linker.defaultLookup();
MethodHandle adviseHandle = null;
MethodHandle mincoreHandle = null;
int pagesize = -1;
PosixNativeAccess instance = null;
try {
adviseHandle = lookupMadvise(linker, stdlib);
// TODO: is mincore available on all systems where we need it? Do we need to handle the case
// when it's missing?
mincoreHandle = lookupMincore(linker, stdlib);
pagesize = (int) lookupGetPageSize(linker, stdlib).invokeExact();
instance = new PosixNativeAccess();
} catch (UnsupportedOperationException uoe) {
Expand All @@ -88,6 +94,7 @@ static Optional<NativeAccess> getInstance() {
throw new AssertionError(e);
}
MH$posix_madvise = adviseHandle;
MH$mincore = mincoreHandle;
PAGE_SIZE = pagesize;
INSTANCE = Optional.ofNullable(instance);
}
Expand All @@ -104,6 +111,15 @@ private static MethodHandle lookupMadvise(Linker linker, SymbolLookup stdlib) {
ValueLayout.JAVA_INT));
}

private static MethodHandle lookupMincore(Linker linker, SymbolLookup stdlib) {
return findFunction(
linker,
stdlib,
"mincore",
FunctionDescriptor.of(
ValueLayout.JAVA_INT, ValueLayout.ADDRESS, ValueLayout.JAVA_LONG, ValueLayout.ADDRESS));
}

private static MethodHandle lookupGetPageSize(Linker linker, SymbolLookup stdlib) {
return findFunction(linker, stdlib, "getpagesize", FunctionDescriptor.of(ValueLayout.JAVA_INT));
}
Expand Down Expand Up @@ -165,6 +181,44 @@ private Integer mapReadAdvice(ReadAdvice readAdvice) {
};
}

@Override
public boolean mincore(MemorySegment segment) throws IOException {
final long numPages = (segment.byteSize() + getPageSize() - 1) / getPageSize();
try (Arena arena = Arena.ofConfined()) {
MemorySegment vec = arena.allocate(numPages);
mincore(segment, vec);
for (long i = 0; i < numPages; ++i) {
byte b = vec.get(ValueLayout.JAVA_BYTE, i);
if (b == 0) {
return false;
}
}
return true;
}
}

private static void mincore(MemorySegment segment, MemorySegment vec) throws IOException {
if (segment.byteSize() == 0L) {
return; // empty segments should be excluded, because they may have no address at all
}
final int ret;
try {
ret = (int) MH$mincore.invokeExact(segment, segment.byteSize(), vec);
} catch (Throwable th) {
throw new AssertionError(th);
}
if (ret != 0) {
throw new IOException(
String.format(
Locale.ENGLISH,
"Call to mincore with address=0x%08X, byteSize=%d and vec.byteSize=%d failed with return code %d.",
segment.address(),
segment.byteSize(),
vec.byteSize(),
ret));
}
}

@Override
public int getPageSize() {
return PAGE_SIZE;
Expand Down
37 changes: 37 additions & 0 deletions lucene/core/src/test/org/apache/lucene/util/TestBitUtil.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.util;

import org.apache.lucene.tests.util.LuceneTestCase;

public class TestBitUtil extends LuceneTestCase {

public void testIsZeroOrPowerOfTwo() {
assertTrue(BitUtil.isZeroOrPowerOfTwo(0));
for (int shift = 0; shift <= 31; ++shift) {
assertTrue(BitUtil.isZeroOrPowerOfTwo(1 << shift));
}
assertFalse(BitUtil.isZeroOrPowerOfTwo(3));
assertFalse(BitUtil.isZeroOrPowerOfTwo(5));
assertFalse(BitUtil.isZeroOrPowerOfTwo(6));
assertFalse(BitUtil.isZeroOrPowerOfTwo(7));
assertFalse(BitUtil.isZeroOrPowerOfTwo(9));
assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE));
assertFalse(BitUtil.isZeroOrPowerOfTwo(Integer.MAX_VALUE + 2));
assertFalse(BitUtil.isZeroOrPowerOfTwo(-1));
}
}

0 comments on commit a6b852c

Please sign in to comment.