Skip to content

Commit

Permalink
Changed bulk iteration to use IntsRef instead.
Browse files Browse the repository at this point in the history
  • Loading branch information
antonha committed Mar 15, 2024
1 parent 8287e85 commit 60495d4
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 57 deletions.
12 changes: 12 additions & 0 deletions lucene/core/src/java/org/apache/lucene/index/PointValues.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.ArrayUtil.ByteArrayComparator;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.bkd.BKDConfig;

/**
Expand Down Expand Up @@ -298,6 +299,17 @@ default void visit(DocIdSetIterator iterator) throws IOException {
}
}

/**
* Similar to {@link IntersectVisitor#visit(int)}, but a bulk visit and implements may have
* their optimizations. Even if the implementation does the same thing, this can be a speed
* improvement due to fewer virtual calls.
*/
default void visit(IntsRef ref) throws IOException {
for (int i = ref.offset; i < ref.length + ref.offset; i++) {
visit(ref.ints[i]);
}
}

/**
* Called for all documents in a leaf cell that crosses the query. The consumer should
* scrutinize the packedValue to decide whether to accept it. In the 1D case, values are visited
Expand Down
16 changes: 16 additions & 0 deletions lucene/core/src/java/org/apache/lucene/search/PointRangeQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.DocIdSetBuilder;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;

/**
* Abstract class for range queries against single or multidimensional points such as {@link
Expand Down Expand Up @@ -185,6 +186,13 @@ public void visit(DocIdSetIterator iterator) throws IOException {
adder.add(iterator);
}

@Override
public void visit(IntsRef ref) {
for (int i = ref.offset; i < ref.offset + ref.length; i++) {
adder.add(ref.ints[i]);
}
}

@Override
public void visit(int docID, byte[] packedValue) {
if (matches(packedValue)) {
Expand Down Expand Up @@ -222,6 +230,14 @@ public void visit(DocIdSetIterator iterator) throws IOException {
cost[0] = Math.max(0, cost[0] - iterator.cost());
}

@Override
public void visit(IntsRef ref) throws IOException {
for (int i = ref.offset; i < ref.offset + ref.length; i++) {
result.clear(ref.ints[i]);
cost[0]--;
}
}

@Override
public void visit(int docID, byte[] packedValue) {
if (matches(packedValue) == false) {
Expand Down
73 changes: 16 additions & 57 deletions lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.DocBaseBitSetIterator;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;

final class DocIdsWriter {

Expand All @@ -36,64 +37,20 @@ final class DocIdsWriter {

private final int[] scratch;

private final ScratchDocIdSetIterator scratchDocIdSetIterator = new ScratchDocIdSetIterator();

/**
* DocIdSetIterator to be used to iterate over the scratch buffer. A single instance is reused to
* avoid re-allocating the object. The reset method should be called before each use with the
* count.
* IntsRef to be used to iterate over the scratch buffer. A single instance is reused to avoid
* re-allocating the object. The ints and length fields need to be reset each use.
*
* <p>The main reason for existing is to be able to call the {@link
* IntersectVisitor#visit(DocIdSetIterator)} method rather than the {@link
* IntersectVisitor#visit(int)} method. This seems to make a difference in performance, probably
* due to fewer virtual calls then happening (once per read call rather than once per doc).
* IntersectVisitor#visit(IntsRef)} method rather than the {@link IntersectVisitor#visit(int)}
* method. This seems to make a difference in performance, probably due to fewer virtual calls
* then happening (once per read call rather than once per doc).
*/
private class ScratchDocIdSetIterator extends DocIdSetIterator {

private int index = -1;
private int count = -1;

@Override
public int docID() {
if (index < 0) {
return -1;
}
if (index >= count) {
return NO_MORE_DOCS;
}
return scratch[index];
}

@Override
public int nextDoc() throws IOException {
index++;
if (index >= count) {
return NO_MORE_DOCS;
}
return scratch[index];
}
private final IntsRef scratchIntsRef = new IntsRef();

@Override
public int advance(int target) throws IOException {
while (index < count && scratch[index] < target) {
index++;
}
if (index >= count) {
return NO_MORE_DOCS;
} else {
return scratch[index];
}
}

@Override
public long cost() {
return count;
}

void reset(int count) {
this.count = count;
this.index = -1;
}
{
// This is here to not rely on the default constructor of IntsRef to set offset to 0
scratchIntsRef.offset = 0;
}

DocIdsWriter(int maxPointsInLeaf) {
Expand Down Expand Up @@ -378,8 +335,9 @@ private static void readLegacyDeltaVInts(IndexInput in, int count, IntersectVisi

private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
readDelta16(in, count, scratch);
scratchDocIdSetIterator.reset(count);
visitor.visit(scratchDocIdSetIterator);
scratchIntsRef.ints = scratch;
scratchIntsRef.length = count;
visitor.visit(scratchIntsRef);
}

private static void readInts24(IndexInput in, int count, IntersectVisitor visitor)
Expand All @@ -405,7 +363,8 @@ private static void readInts24(IndexInput in, int count, IntersectVisitor visito

private void readInts32(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
in.readInts(scratch, 0, count);
scratchDocIdSetIterator.reset(count);
visitor.visit(scratchDocIdSetIterator);
scratchIntsRef.ints = scratch;
scratchIntsRef.length = count;
visitor.visit(scratchIntsRef);
}
}

0 comments on commit 60495d4

Please sign in to comment.