Skip to content

Commit

Permalink
KNN80DocValues should only be considered for BinaryDocValues fields (o…
Browse files Browse the repository at this point in the history
…pensearch-project#2147)

Consider adding files from fields that has BinaryDocValues and doesn't
have filter values in producer.

Signed-off-by: Vijayan Balasubramanian <balasvij@amazon.com>
  • Loading branch information
VijayanB authored Sep 26, 2024
1 parent a1227ea commit 329fc57
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Add short circuit if no live docs are in segments [#2059](https://github.com/opensearch-project/k-NN/pull/2059)
* Update Default Rescore Context based on Dimension [#2149](https://github.com/opensearch-project/k-NN/pull/2149)
### Bug Fixes
* KNN80DocValues should only be considered for BinaryDocValues fields [#2147](https://github.com/opensearch-project/k-NN/pull/2147)
### Infrastructure
### Documentation
### Maintenance
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import lombok.extern.log4j.Log4j2;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
Expand Down Expand Up @@ -69,6 +70,13 @@ public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state
if (!field.attributes().containsKey(KNN_FIELD)) {
continue;
}
// Only segments that contains BinaryDocValues and doesn't have vector values should be considered.
// By default, we don't create BinaryDocValues for knn field anymore. However, users can set doc_values = true
// to create binary doc values explicitly like any other field. Hence, we only want to include fields
// where approximate search is possible only by BinaryDocValues.
if (field.getDocValuesType() != DocValuesType.BINARY || field.hasVectorValues() == true) {
continue;
}
// Only Native Engine put into indexPathMap
KNNEngine knnEngine = getNativeKNNEngine(field);
if (knnEngine == null) {
Expand All @@ -77,6 +85,7 @@ public KNN80DocValuesProducer(DocValuesProducer delegate, SegmentReadState state
List<String> engineFiles = KNNCodecUtil.getEngineFiles(knnEngine.getExtension(), field.name, state.segmentInfo);
Path indexPath = PathUtils.get(directoryPath, engineFiles.get(0));
indexPathMap.putIfAbsent(field.getName(), indexPath.toString());

}
}

Expand Down
4 changes: 4 additions & 0 deletions src/test/java/org/opensearch/knn/index/OpenSearchIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import java.util.Locale;
import lombok.SneakyThrows;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.opensearch.knn.KNNRestTestCase;
import org.opensearch.knn.KNNResult;
import org.apache.hc.core5.http.io.entity.EntityUtils;
Expand Down Expand Up @@ -483,6 +484,9 @@ public void testIndexingVectorValidation_updateVectorWithNull() throws Exception
assertArrayEquals(vectorForDocumentOne, vectorRestoreInitialValue);
}

// This doesn't work since indices that are created post 2.17 don't evict by default when indices are closed or deleted.
// Enable this PR once https://github.com/opensearch-project/k-NN/issues/2148 is resolved.
@Ignore
public void testCacheClear_whenCloseIndex() throws Exception {
String indexName = "test-index-1";
KNNEngine knnEngine1 = KNNEngine.NMSLIB;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
Expand Down Expand Up @@ -127,4 +128,57 @@ public void testProduceKNNBinaryField_fromCodec_nmslibCurrent() throws IOExcepti
assertTrue(path.contains(segmentFiles.get(0)));
}

public void testProduceKNNBinaryField_whenFieldHasNonBinaryDocValues_thenSkipThoseField() throws IOException {
// Set information about the segment and the fields
DocValuesFormat mockDocValuesFormat = mock(DocValuesFormat.class);
Codec mockDelegateCodec = mock(Codec.class);
DocValuesProducer mockDocValuesProducer = mock(DocValuesProducer.class);
when(mockDelegateCodec.docValuesFormat()).thenReturn(mockDocValuesFormat);
when(mockDocValuesFormat.fieldsProducer(any())).thenReturn(mockDocValuesProducer);
when(mockDocValuesFormat.getName()).thenReturn("mockDocValuesFormat");
Codec codec = new KNN87Codec(mockDelegateCodec);

String segmentName = "_test";
int docsInSegment = 100;
String fieldName1 = String.format("test_field1%s", randomAlphaOfLength(4));
String fieldName2 = String.format("test_field2%s", randomAlphaOfLength(4));
List<String> segmentFiles = Arrays.asList(
String.format("%s_2011_%s%s", segmentName, fieldName1, KNNEngine.NMSLIB.getExtension()),
String.format("%s_165_%s%s", segmentName, fieldName2, KNNEngine.FAISS.getExtension())
);

KNNEngine knnEngine = KNNEngine.NMSLIB;
SpaceType spaceType = SpaceType.COSINESIMIL;
SegmentInfo segmentInfo = KNNCodecTestUtil.segmentInfoBuilder()
.directory(directory)
.segmentName(segmentName)
.docsInSegment(docsInSegment)
.codec(codec)
.build();

for (String name : segmentFiles) {
IndexOutput indexOutput = directory.createOutput(name, IOContext.DEFAULT);
indexOutput.close();
}
segmentInfo.setFiles(segmentFiles);

FieldInfo[] fieldInfoArray = new FieldInfo[] {
KNNCodecTestUtil.FieldInfoBuilder.builder(fieldName1)
.addAttribute(KNNVectorFieldMapper.KNN_FIELD, "true")
.addAttribute(KNNConstants.KNN_ENGINE, knnEngine.getName())
.addAttribute(KNNConstants.SPACE_TYPE, spaceType.getValue())
.docValuesType(DocValuesType.NONE)
.dvGen(-1)
.build() };

FieldInfos fieldInfos = new FieldInfos(fieldInfoArray);
SegmentReadState state = new SegmentReadState(directory, segmentInfo, fieldInfos, IOContext.DEFAULT);

DocValuesFormat docValuesFormat = codec.docValuesFormat();
assertTrue(docValuesFormat instanceof KNN80DocValuesFormat);
DocValuesProducer producer = docValuesFormat.fieldsProducer(state);
assertTrue(producer instanceof KNN80DocValuesProducer);
assertEquals(0, ((KNN80DocValuesProducer) producer).getOpenedIndexPath().size());
}

}

0 comments on commit 329fc57

Please sign in to comment.