From 980ff4f719086cfda0eef02c3482cdd0d7681948 Mon Sep 17 00:00:00 2001 From: Aditi Ahuja Date: Thu, 18 Apr 2024 12:59:18 +0530 Subject: [PATCH] update formula --- section_faiss_vector_index.go | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/section_faiss_vector_index.go b/section_faiss_vector_index.go index 5df7e432..55c7523f 100644 --- a/section_faiss_vector_index.go +++ b/section_faiss_vector_index.go @@ -348,8 +348,10 @@ func (v *vectorIndexOpaque) mergeAndWriteVectorIndexes(sbs []*SegmentBase, nvecs := len(finalVecIDs) - // index type to be created after merge based on the number of vectors in - // indexData added into the index. + indexOptimizedFor := indexes[0].indexOptimizedFor + + // index type to be created after merge based on the number of vectors + // in indexData added into the index and chosen optimization. nlist := determineCentroids(nvecs) indexDescription, indexClass := determineIndexToUse(nvecs, nlist) @@ -437,10 +439,7 @@ func determineCentroids(nvecs int) int { var nlist int switch { - // At 1M vectors, nlist = 4k gave a reasonably high recall with the right nprobe, - // whereas 1M/100 = 10000 centroids would increase training time without - // corresponding increase in recall - case nvecs >= 1000000: + case nvecs >= 200000: nlist = int(4 * math.Sqrt(float64(nvecs))) case nvecs >= 1000: // 100 points per cluster is a reasonable default, considering the default