Skip to content

Commit

Permalink
update formula
Browse files Browse the repository at this point in the history
  • Loading branch information
metonymic-smokey committed Apr 30, 2024
1 parent 8de5651 commit 980ff4f
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions section_faiss_vector_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -348,8 +348,10 @@ func (v *vectorIndexOpaque) mergeAndWriteVectorIndexes(sbs []*SegmentBase,

nvecs := len(finalVecIDs)

// index type to be created after merge based on the number of vectors in
// indexData added into the index.
indexOptimizedFor := indexes[0].indexOptimizedFor

// index type to be created after merge based on the number of vectors
// in indexData added into the index and chosen optimization.
nlist := determineCentroids(nvecs)
indexDescription, indexClass := determineIndexToUse(nvecs, nlist)

Expand Down Expand Up @@ -437,10 +439,7 @@ func determineCentroids(nvecs int) int {
var nlist int

switch {
// At 1M vectors, nlist = 4k gave a reasonably high recall with the right nprobe,
// whereas 1M/100 = 10000 centroids would increase training time without
// corresponding increase in recall
case nvecs >= 1000000:
case nvecs >= 200000:
nlist = int(4 * math.Sqrt(float64(nvecs)))
case nvecs >= 1000:
// 100 points per cluster is a reasonable default, considering the default
Expand Down

0 comments on commit 980ff4f

Please sign in to comment.