Use IndexInput#prefetch for postings, skip data and impacts (apache#1…

…3364) This uses the `IndexInput#prefetch` API for postings. This relies on heuristics, as we don't know ahead of time what data we will need from a postings list: - Postings lists are prefetched entirely when they are short (< 16kB). - Impacts enums also prefetch the first page of skip data. - Postings enums prefetc skip data on the first call to advance(). Positions, offsets and payloads are never prefetched. Putting the `IndexInput#prefetch` call in `TermsEnum#postings` and `TermsEnum#impacts` works well because `BooleanQuery` will first create postings/impacts enums for all clauses before it starts unioning/intersecting them. This allows the prefetching logic to run in parallel across all clauses of the same query on the same segment.
jpountz · May 17, 2024 · c5331df · c5331df
1 parent 3d671a0
commit c5331df
Showing 1 changed file with 101 additions and 40 deletions.
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene99/Lucene99PostingsReader.java
@@ -53,6 +53,9 @@
  */
 public final class Lucene99PostingsReader extends PostingsReaderBase {
 
+  /** Maximum byte size of a postings list to be fully prefetched. */
+  private static final int MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH = 16_384;
+
   private final IndexInput docIn;
   private final IndexInput posIn;
   private final IndexInput payIn;
@@ -321,6 +324,7 @@ final class BlockDocsEnum extends PostingsEnum {
 
     private Lucene99SkipReader skipper;
     private boolean skipped;
+    private boolean prefetchedSkipData;
 
     final IndexInput startDocIn;
 
@@ -393,7 +397,7 @@ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOExcep
           // lazy init
           docIn = startDocIn.clone();
         }
-        docIn.seek(docTermStartFP);
+        seekAndPrefetchPostings(docIn, termState);
       }
 
       doc = -1;
@@ -409,6 +413,7 @@ public PostingsEnum reset(IntBlockTermState termState, int flags) throws IOExcep
       nextSkipDoc = BLOCK_SIZE - 1; // we won't skip if target is found in first block
       docBufferUpto = BLOCK_SIZE;
       skipped = false;
+      prefetchedSkipData = false;
       return this;
     }
 
@@ -501,44 +506,52 @@ public int nextDoc() throws IOException {
     public int advance(int target) throws IOException {
       // current skip docID < docIDs generated from current buffer <= next skip docID
       // we don't need to skip if target is buffered already
-      if (docFreq > BLOCK_SIZE && target > nextSkipDoc) {
-
-        if (skipper == null) {
-          // Lazy init: first time this enum has ever been used for skipping
-          skipper =
-              new Lucene99SkipReader(
-                  docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
-        }
-
-        if (!skipped) {
-          assert skipOffset != -1;
-          // This is the first time this enum has skipped
-          // since reset() was called; load the skip data:
-          skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
-          skipped = true;
-        }
-
-        // always plus one to fix the result, since skip position in Lucene99SkipReader
-        // is a little different from MultiLevelSkipListReader
-        final int newDocUpto = skipper.skipTo(target) + 1;
+      if (docFreq > BLOCK_SIZE) {
+        if (target <= nextSkipDoc) {
+          // We don't need skip data yet, but we have evidence that advance() is called, so let's
+          // prefetch skip data in the background.
+          if (prefetchedSkipData == false) {
+            prefetchSkipData(docIn, docTermStartFP, skipOffset);
+            prefetchedSkipData = true;
+          }
+        } else {
+          if (skipper == null) {
+            // Lazy init: first time this enum has ever been used for skipping
+            skipper =
+                new Lucene99SkipReader(
+                    docIn.clone(), MAX_SKIP_LEVELS, indexHasPos, indexHasOffsets, indexHasPayloads);
+          }
 
-        if (newDocUpto >= blockUpto) {
-          // Skipper moved
-          assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
-          blockUpto = newDocUpto;
+          if (!skipped) {
+            assert skipOffset != -1;
+            // This is the first time this enum has skipped
+            // since reset() was called; load the skip data:
+            skipper.init(docTermStartFP + skipOffset, docTermStartFP, 0, 0, docFreq);
+            skipped = true;
+          }
 
-          // Force to read next block
-          docBufferUpto = BLOCK_SIZE;
-          accum = skipper.getDoc(); // actually, this is just lastSkipEntry
-          docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
-          // even if freqBuffer were not read from the previous block, we will mark them as read,
-          // as we don't need to skip the previous block freqBuffer in refillDocs,
-          // as we have already positioned docIn where in needs to be.
-          isFreqsRead = true;
+          // always plus one to fix the result, since skip position in Lucene99SkipReader
+          // is a little different from MultiLevelSkipListReader
+          final int newDocUpto = skipper.skipTo(target) + 1;
+
+          if (newDocUpto >= blockUpto) {
+            // Skipper moved
+            assert newDocUpto % BLOCK_SIZE == 0 : "got " + newDocUpto;
+            blockUpto = newDocUpto;
+
+            // Force to read next block
+            docBufferUpto = BLOCK_SIZE;
+            accum = skipper.getDoc(); // actually, this is just lastSkipEntry
+            docIn.seek(skipper.getDocPointer()); // now point to the block we want to search
+            // even if freqBuffer were not read from the previous block, we will mark them as read,
+            // as we don't need to skip the previous block freqBuffer in refillDocs,
+            // as we have already positioned docIn where in needs to be.
+            isFreqsRead = true;
+          }
+          // next time we call advance, this is used to
+          // foresee whether skipper is necessary.
+          nextSkipDoc = skipper.getNextSkipDoc();
         }
-        // next time we call advance, this is used to
-        // foresee whether skipper is necessary.
-        nextSkipDoc = skipper.getNextSkipDoc();
       }
       if (docBufferUpto == BLOCK_SIZE) {
         refillDocs();
@@ -594,6 +607,7 @@ final class EverythingEnum extends PostingsEnum {
 
     private Lucene99SkipReader skipper;
     private boolean skipped;
+    private boolean prefetchedSkipData;
 
     final IndexInput startDocIn;
 
@@ -715,7 +729,7 @@ public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOExc
           // lazy init
           docIn = startDocIn.clone();
         }
-        docIn.seek(docTermStartFP);
+        seekAndPrefetchPostings(docIn, termState);
       }
       posPendingFP = posTermStartFP;
       payPendingFP = payTermStartFP;
@@ -741,6 +755,7 @@ public EverythingEnum reset(IntBlockTermState termState, int flags) throws IOExc
       }
       docBufferUpto = BLOCK_SIZE;
       skipped = false;
+      prefetchedSkipData = false;
       return this;
     }
 
@@ -902,6 +917,13 @@ public int advance(int target) throws IOException {
           payloadByteUpto = skipper.getPayloadByteUpto();
         }
         nextSkipDoc = skipper.getNextSkipDoc();
+      } else {
+        // We don't need skip data yet, but we have evidence that advance() is used, so prefetch it
+        // in the background.
+        if (prefetchedSkipData == false) {
+          prefetchSkipData(docIn, docTermStartFP, skipOffset);
+          prefetchedSkipData = true;
+        }
       }
       if (docBufferUpto == BLOCK_SIZE) {
         refillDocs();
@@ -1097,7 +1119,9 @@ public BlockImpactsDocsEnum(FieldInfo fieldInfo, IntBlockTermState termState)
       this.docIn = Lucene99PostingsReader.this.docIn.clone();
 
       docFreq = termState.docFreq;
-      docIn.seek(termState.docStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      // Impacts almost certainly need skip data
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
 
       doc = -1;
       accum = 0;
@@ -1318,7 +1342,8 @@ public BlockImpactsPostingsEnum(FieldInfo fieldInfo, IntBlockTermState termState
       posTermStartFP = termState.posStartFP;
       payTermStartFP = termState.payStartFP;
       totalTermFreq = termState.totalTermFreq;
-      docIn.seek(docTermStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
       posPendingFP = posTermStartFP;
       posPendingCount = 0;
       if (termState.totalTermFreq < BLOCK_SIZE) {
@@ -1672,7 +1697,8 @@ public BlockImpactsEverythingEnum(FieldInfo fieldInfo, IntBlockTermState termSta
       posTermStartFP = termState.posStartFP;
       payTermStartFP = termState.payStartFP;
       totalTermFreq = termState.totalTermFreq;
-      docIn.seek(docTermStartFP);
+      seekAndPrefetchPostings(docIn, termState);
+      prefetchSkipData(docIn, termState.docStartFP, termState.skipOffset);
       posPendingFP = posTermStartFP;
       payPendingFP = payTermStartFP;
       posPendingCount = 0;
@@ -2049,6 +2075,41 @@ public long cost() {
     }
   }
 
+  private void seekAndPrefetchPostings(IndexInput docIn, IntBlockTermState state)
+      throws IOException {
+    if (docIn.getFilePointer() != state.docStartFP) {
+      // Don't prefetch if the input is already positioned at the right offset, which suggests that
+      // the caller is streaming the entire inverted index (e.g. for merging), let the read-ahead
+      // logic do its work instead. Note that this heuristic doesn't work for terms that have skip
+      // data, since skip data is stored after the last term, but handling all terms that have <128
+      // docs is a good start already.
+      docIn.seek(state.docStartFP);
+      if (state.skipOffset < 0) {
+        // This postings list is very short as it doesn't have skip data, prefetch the page that
+        // holds the first byte of the postings list.
+        docIn.prefetch(state.docStartFP, 1);
+      } else if (state.skipOffset <= MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
+        // This postings list is short as it fits on a few pages, prefetch it all, plus one byte to
+        // make sure to include some skip data.
+        docIn.prefetch(state.docStartFP, state.skipOffset + 1);
+      } else {
+        // Default case: prefetch the page that holds the first byte of postings. We'll prefetch
+        // skip data when we have evidence that it is used.
+        docIn.prefetch(state.docStartFP, 1);
+      }
+    }
+    // Note: we don't prefetch positions or offsets, which are less likely to be needed.
+  }
+
+  private void prefetchSkipData(IndexInput docIn, long docStartFP, long skipOffset)
+      throws IOException {
+    if (skipOffset > MAX_POSTINGS_SIZE_FOR_FULL_PREFETCH) {
+      // If skipOffset is less than this value, skip data was already prefetched when doing
+      // #seekAndPrefetchPostings
+      docIn.prefetch(docStartFP + skipOffset, 1);
+    }
+  }
+
   @Override
   public void checkIntegrity() throws IOException {
     if (docIn != null) {