Skip to content

Commit

Permalink
Fix bug in SQ when just a single vector present in a segment (apache#…
Browse files Browse the repository at this point in the history
…13374)

This commit fixes a corner case in the ScalarQuantizer when just a single vector is present. I ran into this when updating a test that previously passed successfully with Lucene 9.10 but fails in 9.x.

The score error correction is calculated to be NaN, as there are no score docs or variance.
  • Loading branch information
ChrisHegarty authored May 16, 2024
1 parent 731cecf commit 3d671a0
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 1 deletion.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,8 @@ Bug Fixes

* GITHUB#13378: Fix points writing with no values (Chris Hegarty)

* GITHUB#13374: Fix bug in SQ when just a single vector present in a segment (Chris Hegarty)

Build
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,7 @@ public ScoreErrorCorrelator(
}
corr.add(1 - errors.var() / scoreVariance);
}
return corr.mean;
return Double.isNaN(corr.mean) ? 0.0 : corr.mean;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,14 @@
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
Expand Down Expand Up @@ -252,6 +254,59 @@ private static void indexVectors(
}
}

public void testSingleVectorPerSegmentCosine() throws IOException {
testSingleVectorPerSegment(VectorSimilarityFunction.COSINE);
}

public void testSingleVectorPerSegmentDot() throws IOException {
testSingleVectorPerSegment(VectorSimilarityFunction.DOT_PRODUCT);
}

public void testSingleVectorPerSegmentEuclidean() throws IOException {
testSingleVectorPerSegment(VectorSimilarityFunction.EUCLIDEAN);
}

public void testSingleVectorPerSegmentMIP() throws IOException {
testSingleVectorPerSegment(VectorSimilarityFunction.MAXIMUM_INNER_PRODUCT);
}

private void testSingleVectorPerSegment(VectorSimilarityFunction sim) throws IOException {
var codec = getCodec(7, false);
try (Directory dir = newDirectory()) {
try (IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig().setCodec(codec))) {
Document doc2 = new Document();
doc2.add(new KnnFloatVectorField("field", new float[] {0.8f, 0.6f}, sim));
doc2.add(newTextField("id", "A", Field.Store.YES));
writer.addDocument(doc2);
writer.commit();

Document doc1 = new Document();
doc1.add(new KnnFloatVectorField("field", new float[] {0.6f, 0.8f}, sim));
doc1.add(newTextField("id", "B", Field.Store.YES));
writer.addDocument(doc1);
writer.commit();

Document doc3 = new Document();
doc3.add(new KnnFloatVectorField("field", new float[] {-0.6f, -0.8f}, sim));
doc3.add(newTextField("id", "C", Field.Store.YES));
writer.addDocument(doc3);
writer.commit();

writer.forceMerge(1);
}
try (DirectoryReader reader = DirectoryReader.open(dir)) {
LeafReader leafReader = getOnlyLeafReader(reader);
StoredFields storedFields = reader.storedFields();
float[] queryVector = new float[] {0.6f, 0.8f};
var hits = leafReader.searchNearestVectors("field", queryVector, 3, null, 100);
assertEquals(hits.scoreDocs.length, 3);
assertEquals("B", storedFields.document(hits.scoreDocs[0].doc).get("id"));
assertEquals("A", storedFields.document(hits.scoreDocs[1].doc).get("id"));
assertEquals("C", storedFields.document(hits.scoreDocs[2].doc).get("id"));
}
}
}

private static byte[] floatToByteArray(float value) {
return ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN).putFloat(value).array();
}
Expand Down

0 comments on commit 3d671a0

Please sign in to comment.