Skip to content

Commit

Permalink
BaseVectorSimilarityQueryTestCase assumes connected hnsw graph (#13260)
Browse files Browse the repository at this point in the history
  • Loading branch information
msokolov authored and benwtrent committed Jun 29, 2024
1 parent e8bf83f commit 030f15f
Show file tree
Hide file tree
Showing 5 changed files with 163 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.hnsw.HnswTestUtil;

@LuceneTestCase.SuppressCodecs("SimpleText")
abstract class BaseVectorSimilarityQueryTestCase<
Expand Down Expand Up @@ -165,6 +166,7 @@ public void testRandomFilter() throws IOException {

try (Directory indexStore = getIndexStore(getRandomVectors(numDocs, dim));
IndexReader reader = DirectoryReader.open(indexStore)) {
assumeTrue("graph is disconnected", HnswTestUtil.graphIsConnected(reader, vectorField));
IndexSearcher searcher = newSearcher(reader);

Query query =
Expand Down Expand Up @@ -289,6 +291,7 @@ public void testSomeDeletes() throws IOException {
w.commit();

try (IndexReader reader = DirectoryReader.open(indexStore)) {
assumeTrue("graph is disconnected", HnswTestUtil.graphIsConnected(reader, vectorField));
IndexSearcher searcher = newSearcher(reader);

Query query =
Expand Down
1 change: 1 addition & 0 deletions lucene/test-framework/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
exports org.apache.lucene.tests.store;
exports org.apache.lucene.tests.util.automaton;
exports org.apache.lucene.tests.util.fst;
exports org.apache.lucene.tests.util.hnsw;
exports org.apache.lucene.tests.util;

provides org.apache.lucene.codecs.Codec with
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.lucene.tests.codecs.asserting;

import java.io.IOException;
import org.apache.lucene.codecs.HnswGraphProvider;
import org.apache.lucene.codecs.KnnFieldVectorsWriter;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.KnnVectorsReader;
Expand All @@ -34,6 +35,7 @@
import org.apache.lucene.search.KnnCollector;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.hnsw.HnswGraph;

/** Wraps the default KnnVectorsFormat and provides additional assertions. */
public class AssertingKnnVectorsFormat extends KnnVectorsFormat {
Expand Down Expand Up @@ -95,7 +97,7 @@ public long ramBytesUsed() {
}
}

static class AssertingKnnVectorsReader extends KnnVectorsReader {
static class AssertingKnnVectorsReader extends KnnVectorsReader implements HnswGraphProvider {
final KnnVectorsReader delegate;
final FieldInfos fis;

Expand Down Expand Up @@ -168,5 +170,10 @@ public void close() throws IOException {
public long ramBytesUsed() {
return delegate.ramBytesUsed();
}

@Override
public HnswGraph getGraph(String field) throws IOException {
return ((HnswGraphProvider) delegate).getGraph(field);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.util.hnsw;

import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;

import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import org.apache.lucene.codecs.HnswGraphProvider;
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.hnsw.HnswGraph;

/** Utilities for use in tests involving HNSW graphs */
public class HnswTestUtil {

/**
* Returns true iff level 0 of the graph is fully connected - that is every node is reachable from
* any entry point.
*/
public static boolean isFullyConnected(HnswGraph knnValues) throws IOException {
return componentSizes(knnValues).size() < 2;
}

/**
* Returns the sizes of the distinct graph components on level 0. If the graph is fully-connected
* there will only be a single component. If the graph is empty, the returned list will be empty.
*/
public static List<Integer> componentSizes(HnswGraph hnsw) throws IOException {
List<Integer> sizes = new ArrayList<>();
FixedBitSet connectedNodes = new FixedBitSet(hnsw.size());
assert hnsw.size() == hnsw.getNodesOnLevel(0).size();
int total = 0;
while (total < connectedNodes.length()) {
int componentSize = traverseConnectedNodes(hnsw, connectedNodes);
assert componentSize > 0;
sizes.add(componentSize);
total += componentSize;
}
return sizes;
}

// count the nodes in a connected component of the graph and set the bits of its nodes in
// connectedNodes bitset
private static int traverseConnectedNodes(HnswGraph hnswGraph, FixedBitSet connectedNodes)
throws IOException {
// Start at entry point and search all nodes on this level
int entryPoint = nextClearBit(connectedNodes, 0);
if (entryPoint == NO_MORE_DOCS) {
return 0;
}
Deque<Integer> stack = new ArrayDeque<>();
stack.push(entryPoint);
int count = 0;
while (!stack.isEmpty()) {
int node = stack.pop();
if (connectedNodes.get(node)) {
continue;
}
count++;
connectedNodes.set(node);
hnswGraph.seek(0, node);
int friendOrd;
while ((friendOrd = hnswGraph.nextNeighbor()) != NO_MORE_DOCS) {
stack.push(friendOrd);
}
}
return count;
}

private static int nextClearBit(FixedBitSet bits, int index) {
// Does not depend on the ghost bits being clear!
long[] barray = bits.getBits();
assert index >= 0 && index < bits.length() : "index=" + index + ", numBits=" + bits.length();
int i = index >> 6;
long word = ~(barray[i] >> index); // skip all the bits to the right of index

if (word != 0) {
return index + Long.numberOfTrailingZeros(word);
}

while (++i < barray.length) {
word = ~barray[i];
if (word != 0) {
int next = (i << 6) + Long.numberOfTrailingZeros(word);
if (next >= bits.length()) {
return NO_MORE_DOCS;
} else {
return next;
}
}
}
return NO_MORE_DOCS;
}

public static boolean graphIsConnected(IndexReader reader, String vectorField)
throws IOException {
for (LeafReaderContext ctx : reader.leaves()) {
CodecReader codecReader = (CodecReader) FilterLeafReader.unwrap(ctx.reader());
HnswGraph graph =
((HnswGraphProvider)
((PerFieldKnnVectorsFormat.FieldsReader) codecReader.getVectorReader())
.getFieldReader(vectorField))
.getGraph(vectorField);
if (isFullyConnected(graph) == false) {
return false;
}
}
return true;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/** Support for HNSW testing. */
package org.apache.lucene.tests.util.hnsw;

0 comments on commit 030f15f

Please sign in to comment.