Skip to content

Commit

Permalink
ToParentBlockJoin[Byte|Float]KnnVectorQuery needs to handle the case …
Browse files Browse the repository at this point in the history
…when parents are missing (#12504)

This is a follow up to: #12434

Adds a test for when parents are missing in the index and verifies we return no hits. Previously this would have thrown an NPE
  • Loading branch information
benwtrent committed Aug 14, 2023
1 parent e888dbe commit 14e0947
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ protected TopDocs exactSearch(LeafReaderContext context, DocIdSetIterator accept
return null;
}
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
ParentBlockJoinByteVectorScorer vectorScorer =
new ParentBlockJoinByteVectorScorer(
context.reader().getByteVectorValues(field),
Expand Down Expand Up @@ -112,6 +115,9 @@ protected TopDocs exactSearch(LeafReaderContext context, DocIdSetIterator accept
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
throws IOException {
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
return collector.topDocs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ protected TopDocs exactSearch(LeafReaderContext context, DocIdSetIterator accept
return null;
}
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
ParentBlockJoinFloatVectorScorer vectorScorer =
new ParentBlockJoinFloatVectorScorer(
context.reader().getFloatVectorValues(field),
Expand Down Expand Up @@ -114,6 +117,9 @@ protected TopDocs exactSearch(LeafReaderContext context, DocIdSetIterator accept
protected TopDocs approximateSearch(LeafReaderContext context, Bits acceptDocs, int visitedLimit)
throws IOException {
BitSet parentBitSet = parentsFilter.getBitSet(context);
if (parentBitSet == null) {
return NO_RESULTS;
}
KnnCollector collector = new ToParentJoinKnnCollector(k, visitedLimit, parentBitSet);
context.reader().searchNearestVectors(field, query, collector, acceptDocs);
return collector.topDocs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,78 @@ public void testEmptyIndex() throws IOException {
}
}

public void testIndexWithNoVectorsNorParents() throws IOException {
try (Directory d = newDirectory()) {
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
// Add some documents without a vector
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new StringField("other", "value", Field.Store.NO));
w.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(d)) {
IndexSearcher searcher = new IndexSearcher(reader);
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
// may not
// verify we handle it gracefully
BitSetProducer parentFilter =
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
TopDocs topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);

// Test with match_all filter and large k to test exact search
query =
getParentJoinKnnQuery(
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
}
}
}

public void testIndexWithNoParents() throws IOException {
try (Directory d = newDirectory()) {
try (IndexWriter w = new IndexWriter(d, new IndexWriterConfig())) {
for (int i = 0; i < 3; ++i) {
Document doc = new Document();
doc.add(getKnnVectorField("field", new float[] {2, 2}));
doc.add(newStringField("id", Integer.toString(i), Field.Store.YES));
w.addDocument(doc);
}
// Add some documents without a vector
for (int i = 0; i < 5; i++) {
Document doc = new Document();
doc.add(new StringField("other", "value", Field.Store.NO));
w.addDocument(doc);
}
}
try (IndexReader reader = DirectoryReader.open(d)) {
IndexSearcher searcher = new IndexSearcher(reader);
// Create parent filter directly, tests use "check" to verify parentIds exist. Production
// may not
// verify we handle it gracefully
BitSetProducer parentFilter =
new QueryBitSetProducer(new TermQuery(new Term("docType", "_parent")));
Query query = getParentJoinKnnQuery("field", new float[] {2, 2}, null, 3, parentFilter);
TopDocs topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);

// Test with match_all filter and large k to test exact search
query =
getParentJoinKnnQuery(
"field", new float[] {2, 2}, new MatchAllDocsQuery(), 10, parentFilter);
topDocs = searcher.search(query, 3);
assertEquals(0, topDocs.totalHits.value);
assertEquals(0, topDocs.scoreDocs.length);
}
}
}

public void testFilterWithNoVectorMatches() throws IOException {
try (Directory indexStore =
getIndexStore("field", new float[] {0, 1}, new float[] {1, 2}, new float[] {0, 0});
Expand Down

0 comments on commit 14e0947

Please sign in to comment.