Changed bulk iteration to use IntsRef instead.

apache · Mar 15, 2024 · 60495d4 · 60495d4
1 parent 8287e85
commit 60495d4
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 57 deletions.
diff --git a/lucene/core/src/java/org/apache/lucene/index/PointValues.java b/lucene/core/src/java/org/apache/lucene/index/PointValues.java
@@ -31,6 +31,7 @@
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
+import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.bkd.BKDConfig;
 
 /**
@@ -298,6 +299,17 @@ default void visit(DocIdSetIterator iterator) throws IOException {
       }
     }
 
+    /**
+     * Similar to {@link IntersectVisitor#visit(int)}, but a bulk visit and implements may have
+     * their optimizations. Even if the implementation does the same thing, this can be a speed
+     * improvement due to fewer virtual calls.
+     */
+    default void visit(IntsRef ref) throws IOException {
+      for (int i = ref.offset; i < ref.length + ref.offset; i++) {
+        visit(ref.ints[i]);
+      }
+    }
+
     /**
      * Called for all documents in a leaf cell that crosses the query. The consumer should
      * scrutinize the packedValue to decide whether to accept it. In the 1D case, values are visited

diff --git a/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java b/lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
@@ -32,6 +32,7 @@
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.DocIdSetBuilder;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IntsRef;
 
 /**
  * Abstract class for range queries against single or multidimensional points such as {@link
@@ -185,6 +186,13 @@ public void visit(DocIdSetIterator iterator) throws IOException {
             adder.add(iterator);
           }
 
+          @Override
+          public void visit(IntsRef ref) {
+            for (int i = ref.offset; i < ref.offset + ref.length; i++) {
+              adder.add(ref.ints[i]);
+            }
+          }
+
           @Override
           public void visit(int docID, byte[] packedValue) {
             if (matches(packedValue)) {
@@ -222,6 +230,14 @@ public void visit(DocIdSetIterator iterator) throws IOException {
             cost[0] = Math.max(0, cost[0] - iterator.cost());
           }
 
+          @Override
+          public void visit(IntsRef ref) throws IOException {
+            for (int i = ref.offset; i < ref.offset + ref.length; i++) {
+              result.clear(ref.ints[i]);
+              cost[0]--;
+            }
+          }
+
           @Override
           public void visit(int docID, byte[] packedValue) {
             if (matches(packedValue) == false) {

diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
@@ -23,6 +23,7 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.DocBaseBitSetIterator;
 import org.apache.lucene.util.FixedBitSet;
+import org.apache.lucene.util.IntsRef;
 
 final class DocIdsWriter {
 
@@ -36,64 +37,20 @@ final class DocIdsWriter {
 
   private final int[] scratch;
 
-  private final ScratchDocIdSetIterator scratchDocIdSetIterator = new ScratchDocIdSetIterator();
-
   /**
-   * DocIdSetIterator to be used to iterate over the scratch buffer. A single instance is reused to
-   * avoid re-allocating the object. The reset method should be called before each use with the
-   * count.
+   * IntsRef to be used to iterate over the scratch buffer. A single instance is reused to avoid
+   * re-allocating the object. The ints and length fields need to be reset each use.
    *
    * <p>The main reason for existing is to be able to call the {@link
-   * IntersectVisitor#visit(DocIdSetIterator)} method rather than the {@link
-   * IntersectVisitor#visit(int)} method. This seems to make a difference in performance, probably
-   * due to fewer virtual calls then happening (once per read call rather than once per doc).
+   * IntersectVisitor#visit(IntsRef)} method rather than the {@link IntersectVisitor#visit(int)}
+   * method. This seems to make a difference in performance, probably due to fewer virtual calls
+   * then happening (once per read call rather than once per doc).
    */
-  private class ScratchDocIdSetIterator extends DocIdSetIterator {
-
-    private int index = -1;
-    private int count = -1;
-
-    @Override
-    public int docID() {
-      if (index < 0) {
-        return -1;
-      }
-      if (index >= count) {
-        return NO_MORE_DOCS;
-      }
-      return scratch[index];
-    }
-
-    @Override
-    public int nextDoc() throws IOException {
-      index++;
-      if (index >= count) {
-        return NO_MORE_DOCS;
-      }
-      return scratch[index];
-    }
+  private final IntsRef scratchIntsRef = new IntsRef();
 
-    @Override
-    public int advance(int target) throws IOException {
-      while (index < count && scratch[index] < target) {
-        index++;
-      }
-      if (index >= count) {
-        return NO_MORE_DOCS;
-      } else {
-        return scratch[index];
-      }
-    }
-
-    @Override
-    public long cost() {
-      return count;
-    }
-
-    void reset(int count) {
-      this.count = count;
-      this.index = -1;
-    }
+  {
+    // This is here to not rely on the default constructor of IntsRef to set offset to 0
+    scratchIntsRef.offset = 0;
   }
 
   DocIdsWriter(int maxPointsInLeaf) {
@@ -378,8 +335,9 @@ private static void readLegacyDeltaVInts(IndexInput in, int count, IntersectVisi
 
   private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
     readDelta16(in, count, scratch);
-    scratchDocIdSetIterator.reset(count);
-    visitor.visit(scratchDocIdSetIterator);
+    scratchIntsRef.ints = scratch;
+    scratchIntsRef.length = count;
+    visitor.visit(scratchIntsRef);
   }
 
   private static void readInts24(IndexInput in, int count, IntersectVisitor visitor)
@@ -405,7 +363,8 @@ private static void readInts24(IndexInput in, int count, IntersectVisitor visito
 
   private void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
     in.readInts(scratch, 0, count);
-    scratchDocIdSetIterator.reset(count);
-    visitor.visit(scratchDocIdSetIterator);
+    scratchIntsRef.ints = scratch;
+    scratchIntsRef.length = count;
+    visitor.visit(scratchIntsRef);
   }
 }