From 91c91e100244e41348092faf27cbffe7354261ed Mon Sep 17 00:00:00 2001 From: Sarthak Nandi Date: Mon, 5 Aug 2024 13:32:05 -0700 Subject: [PATCH] Increase vector size limit to 4096 --- .../luceneserver/field/VectorFieldDef.java | 46 +++++++++++++------ .../field/VectorFieldDefTest.java | 8 ++-- 2 files changed, 35 insertions(+), 19 deletions(-) diff --git a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDef.java b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDef.java index cd72b4ea1..aca23704c 100644 --- a/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDef.java +++ b/src/main/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDef.java @@ -30,6 +30,8 @@ import java.util.List; import java.util.Map; import org.apache.lucene.codecs.KnnVectorsFormat; +import org.apache.lucene.codecs.KnnVectorsReader; +import org.apache.lucene.codecs.KnnVectorsWriter; import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Document; @@ -38,6 +40,8 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SegmentWriteState; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; @@ -53,7 +57,7 @@ public class VectorFieldDef extends IndexableFieldDef implements VectorQueryable "cosine", VectorSimilarityFunction.COSINE); private static final String HNSW_FORMAT_TYPE = "hnsw"; - private static final int MAX_DOC_VALUE_DIMENSIONS = 2048; + private static final int MAX_VECTOR_DIMENSIONS = 4096; private final int vectorDimensions; private final VectorSimilarityFunction similarityFunction; private final KnnVectorsFormat vectorsFormat; @@ -87,7 +91,29 @@ private static KnnVectorsFormat createVectorsFormat(VectorIndexingOptions vector vectorIndexingOptions.getHnswEfConstruction() > 0 ? vectorIndexingOptions.getHnswEfConstruction() : Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; - return new Lucene99HnswVectorsFormat(m, efConstruction); + Lucene99HnswVectorsFormat lucene99HnswVectorsFormat = + new Lucene99HnswVectorsFormat(m, efConstruction); + return new KnnVectorsFormat(lucene99HnswVectorsFormat.getName()) { + @Override + public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException { + return lucene99HnswVectorsFormat.fieldsWriter(state); + } + + @Override + public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException { + return lucene99HnswVectorsFormat.fieldsReader(state); + } + + @Override + public int getMaxDimensions(String fieldName) { + return MAX_VECTOR_DIMENSIONS; + } + + @Override + public String toString() { + return lucene99HnswVectorsFormat.toString(); + } + }; } /** @@ -132,19 +158,9 @@ protected void validateRequest(Field requestField) { if (requestField.getVectorDimensions() <= 0) { throw new IllegalArgumentException("Vector dimension should be > 0"); } - if (requestField.getStoreDocValues() - && requestField.getVectorDimensions() > MAX_DOC_VALUE_DIMENSIONS) { - throw new IllegalArgumentException( - "Vector dimension must be <= " + MAX_DOC_VALUE_DIMENSIONS + " for doc values"); - } - - if (requestField.getSearch()) { - if (requestField.getVectorDimensions() > Lucene99HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS) { - throw new IllegalArgumentException( - "Vector dimension must be <= " - + Lucene99HnswVectorsFormat.DEFAULT_MAX_DIMENSIONS - + " for search"); - } + if ((requestField.getStoreDocValues() || requestField.getSearch()) + && requestField.getVectorDimensions() > MAX_VECTOR_DIMENSIONS) { + throw new IllegalArgumentException("Vector dimension must be <= " + MAX_VECTOR_DIMENSIONS); } } diff --git a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java index d797d8ff6..146e87bc0 100644 --- a/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java +++ b/src/test/java/com/yelp/nrtsearch/server/luceneserver/field/VectorFieldDefTest.java @@ -768,14 +768,14 @@ public void testMaxDocValueDimensions() { Field.newBuilder() .setName("vector") .setType(FieldType.VECTOR) - .setVectorDimensions(2049) + .setVectorDimensions(4097) .setStoreDocValues(true) .build(); try { new VectorFieldDef("vector", field); fail(); } catch (IllegalArgumentException e) { - assertEquals("Vector dimension must be <= 2048 for doc values", e.getMessage()); + assertEquals("Vector dimension must be <= 4096", e.getMessage()); } } @@ -785,7 +785,7 @@ public void testMaxSearchDimensions() { Field.newBuilder() .setName("vector") .setType(FieldType.VECTOR) - .setVectorDimensions(1025) + .setVectorDimensions(4097) .setSearch(true) .setVectorSimilarity("cosine") .build(); @@ -793,7 +793,7 @@ public void testMaxSearchDimensions() { new VectorFieldDef("vector", field); fail(); } catch (IllegalArgumentException e) { - assertEquals("Vector dimension must be <= 1024 for search", e.getMessage()); + assertEquals("Vector dimension must be <= 4096", e.getMessage()); } }