Skip to content

Commit

Permalink
Move group-varint encoding/decoding logic to DataOutput/DataInput (#1…
Browse files Browse the repository at this point in the history
  • Loading branch information
easyice authored Dec 23, 2023
1 parent 9359a9d commit dc9f154
Show file tree
Hide file tree
Showing 18 changed files with 451 additions and 194 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ Optimizations

* GITHUB#12552: Make FSTPostingsFormat load FSTs off-heap. (Tony X)

* GITHUB#12841: Move group-varint encoding/decoding logic to DataOutput/DataInput. (Adrien Grand, Zhang Chao, Uwe Schindler)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,18 @@
import java.util.Arrays;
import java.util.Random;
import java.util.concurrent.TimeUnit;
import org.apache.lucene.codecs.lucene99.GroupVIntReader;
import org.apache.lucene.codecs.lucene99.GroupVIntWriter;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.ByteArrayDataOutput;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.GroupVIntUtil;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
Expand Down Expand Up @@ -86,35 +89,49 @@ public class GroupVIntBenchmark {
final long[] values = new long[maxSize];

IndexInput byteBufferGVIntIn;
IndexInput nioGVIntIn;
IndexInput byteBufferVIntIn;
ByteBuffersDataInput byteBuffersGVIntIn;

ByteArrayDataInput byteArrayVIntIn;
ByteArrayDataInput byteArrayGVIntIn;

// @Param({"16", "32", "64", "128", "248"})
@Param({"64"})
public int size;

void initArrayInput(long[] docs) throws Exception {
byte[] gVIntBytes = new byte[Integer.BYTES * maxSize * 2];
byte[] vIntBytes = new byte[Integer.BYTES * maxSize * 2];
ByteArrayDataOutput vIntOut = new ByteArrayDataOutput(vIntBytes);
GroupVIntWriter w = new GroupVIntWriter();
w.writeValues(new ByteArrayDataOutput(gVIntBytes), docs, docs.length);
ByteArrayDataOutput out = new ByteArrayDataOutput(gVIntBytes);
out.writeGroupVInts(docs, docs.length);
for (long v : docs) {
vIntOut.writeVInt((int) v);
}
byteArrayVIntIn = new ByteArrayDataInput(vIntBytes);
byteArrayGVIntIn = new ByteArrayDataInput(gVIntBytes);
}

void initNioInput(long[] docs) throws Exception {
Directory dir = new NIOFSDirectory(Files.createTempDirectory("groupvintdata"));
IndexOutput out = dir.createOutput("gvint", IOContext.DEFAULT);
out.writeGroupVInts(docs, docs.length);
out.close();
nioGVIntIn = dir.openInput("gvint", IOContext.DEFAULT);
}

void initByteBuffersInput(long[] docs) throws Exception {
ByteBuffersDataOutput buffer = new ByteBuffersDataOutput();
buffer.writeGroupVInts(docs, docs.length);
byteBuffersGVIntIn = buffer.toDataInput();
}

void initByteBufferInput(long[] docs) throws Exception {
Directory dir = MMapDirectory.open(Files.createTempDirectory("groupvintdata"));
Directory dir = new MMapDirectory(Files.createTempDirectory("groupvintdata"));
IndexOutput vintOut = dir.createOutput("vint", IOContext.DEFAULT);
IndexOutput gvintOut = dir.createOutput("gvint", IOContext.DEFAULT);

GroupVIntWriter w = new GroupVIntWriter();
w.writeValues(gvintOut, docs, docs.length);
gvintOut.writeGroupVInts(docs, docs.length);
for (long v : docs) {
vintOut.writeVInt((int) v);
}
Expand All @@ -124,6 +141,16 @@ void initByteBufferInput(long[] docs) throws Exception {
byteBufferVIntIn = dir.openInput("vint", IOContext.DEFAULT);
}

private void readGroupVIntsBaseline(DataInput in, long[] dst, int limit) throws IOException {
int i;
for (i = 0; i <= limit - 4; i += 4) {
GroupVIntUtil.readGroupVInt(in, dst, i);
}
for (; i < limit; ++i) {
dst[i] = in.readVInt();
}
}

@Setup(Level.Trial)
public void init() throws Exception {
long[] docs = new long[maxSize];
Expand All @@ -140,10 +167,12 @@ public void init() throws Exception {
}
initByteBufferInput(docs);
initArrayInput(docs);
initNioInput(docs);
initByteBuffersInput(docs);
}

@Benchmark
public void byteBufferReadVInt(Blackhole bh) throws IOException {
public void benchMMapDirectoryInputs_readVInt(Blackhole bh) throws IOException {
byteBufferVIntIn.seek(0);
for (int i = 0; i < size; i++) {
values[i] = byteBufferVIntIn.readVInt();
Expand All @@ -152,14 +181,21 @@ public void byteBufferReadVInt(Blackhole bh) throws IOException {
}

@Benchmark
public void byteBufferReadGroupVInt(Blackhole bh) throws IOException {
public void benchMMapDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
GroupVIntReader.readValues(byteBufferGVIntIn, values, size);
byteBufferGVIntIn.readGroupVInts(values, size);
bh.consume(values);
}

@Benchmark
public void byteArrayReadVInt(Blackhole bh) {
public void benchMMapDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
byteBufferGVIntIn.seek(0);
this.readGroupVIntsBaseline(byteBufferGVIntIn, values, size);
bh.consume(values);
}

@Benchmark
public void benchByteArrayDataInput_readVInt(Blackhole bh) {
byteArrayVIntIn.rewind();
for (int i = 0; i < size; i++) {
values[i] = byteArrayVIntIn.readVInt();
Expand All @@ -168,9 +204,37 @@ public void byteArrayReadVInt(Blackhole bh) {
}

@Benchmark
public void byteArrayReadGroupVInt(Blackhole bh) throws IOException {
public void benchByteArrayDataInput_readGroupVInt(Blackhole bh) throws IOException {
byteArrayGVIntIn.rewind();
GroupVIntReader.readValues(byteArrayGVIntIn, values, size);
byteArrayGVIntIn.readGroupVInts(values, size);
bh.consume(values);
}

@Benchmark
public void benchNIOFSDirectoryInputs_readGroupVInt(Blackhole bh) throws IOException {
nioGVIntIn.seek(0);
nioGVIntIn.readGroupVInts(values, size);
bh.consume(values);
}

@Benchmark
public void benchNIOFSDirectoryInputs_readGroupVIntBaseline(Blackhole bh) throws IOException {
nioGVIntIn.seek(0);
this.readGroupVIntsBaseline(nioGVIntIn, values, size);
bh.consume(values);
}

@Benchmark
public void benchByteBuffersIndexInput_readGroupVInt(Blackhole bh) throws IOException {
byteBuffersGVIntIn.seek(0);
byteBuffersGVIntIn.readGroupVInts(values, size);
bh.consume(values);
}

@Benchmark
public void benchByteBuffersIndexInput_readGroupVIntBaseline(Blackhole bh) throws IOException {
byteBuffersGVIntIn.seek(0);
this.readGroupVIntsBaseline(byteBuffersGVIntIn, values, size);
bh.consume(values);
}
}

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ static void readVIntBlock(
boolean indexHasFreq,
boolean decodeFreq)
throws IOException {
GroupVIntReader.readValues(docIn, docBuffer, num);
docIn.readGroupVInts(docBuffer, num);
if (indexHasFreq && decodeFreq) {
for (int i = 0; i < num; ++i) {
freqBuffer[i] = docBuffer[i] & 0x01;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,6 @@ public final class Lucene99PostingsWriter extends PushPostingsWriterBase {
private final PForUtil pforUtil;
private final ForDeltaUtil forDeltaUtil;
private final Lucene99SkipWriter skipWriter;
private final GroupVIntWriter docGroupVIntWriter;

private boolean fieldHasNorms;
private NumericDocValues norms;
Expand Down Expand Up @@ -173,7 +172,6 @@ public Lucene99PostingsWriter(SegmentWriteState state) throws IOException {
skipWriter =
new Lucene99SkipWriter(
MAX_SKIP_LEVELS, BLOCK_SIZE, state.segmentInfo.maxDoc(), docOut, posOut, payOut);
docGroupVIntWriter = new GroupVIntWriter();
}

@Override
Expand Down Expand Up @@ -378,7 +376,7 @@ public void finishTerm(BlockTermState _state) throws IOException {
docDeltaBuffer[i] = (docDeltaBuffer[i] << 1) | (freqBuffer[i] == 1 ? 1 : 0);
}
}
docGroupVIntWriter.writeValues(docOut, docDeltaBuffer, docBufferUpto);
docOut.writeGroupVInts(docDeltaBuffer, docBufferUpto);
if (writeFreqs) {
for (int i = 0; i < docBufferUpto; i++) {
final int freq = (int) freqBuffer[i];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import org.apache.lucene.util.GroupVIntUtil;

/** Base implementation class for buffered {@link IndexInput}. */
public abstract class BufferedIndexInput extends IndexInput implements RandomAccessInput {
Expand Down Expand Up @@ -149,6 +150,16 @@ public final int readInt() throws IOException {
}
}

@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
final int len =
GroupVIntUtil.readGroupVInt(
this, buffer.remaining(), p -> buffer.getInt((int) p), buffer.position(), dst, offset);
if (len > 0) {
buffer.position(buffer.position() + len);
}
}

@Override
public final long readLong() throws IOException {
if (Long.BYTES <= buffer.remaining()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import java.util.Locale;
import java.util.stream.Collectors;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.GroupVIntUtil;
import org.apache.lucene.util.RamUsageEstimator;

/**
Expand Down Expand Up @@ -212,6 +213,25 @@ public long readLong() throws IOException {
}
}

@Override
protected void readGroupVInt(long[] dst, int offset) throws IOException {
final ByteBuffer block = blocks[blockIndex(pos)];
final int blockOffset = blockOffset(pos);
// We MUST save the return value to local variable, could not use pos += readGroupVInt(...).
// because `pos +=` in java will move current value(not address) of pos to register first,
// then call the function, but we will update pos value in function via readByte(), then
// `pos +=` will use an old pos value plus return value, thereby missing 1 byte.
final int len =
GroupVIntUtil.readGroupVInt(
this,
block.limit() - blockOffset,
p -> block.getInt((int) p),
blockOffset,
dst,
offset);
pos += len;
}

@Override
public long length() {
return length;
Expand Down
Loading

0 comments on commit dc9f154

Please sign in to comment.