Skip to content

Commit

Permalink
MB-62230 - Pre-filtering performance optimisations (#269)
Browse files Browse the repository at this point in the history
1. Add elements to the bitmap in a batch instead of individually to
bring down latencies of pre-filtered queries.
2. Vary the selector used and use an exclude selector with high
selectivity.

---------

Co-authored-by: Abhinav Dangeti <abhinav@couchbase.com>
  • Loading branch information
metonymic-smokey and abhinavdangeti authored Oct 18, 2024
1 parent 7bab2f9 commit 5f8f0f0
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 7 deletions.
60 changes: 56 additions & 4 deletions faiss_vector_posting.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,19 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
if len(eligibleDocIDs) > 0 {
// Non-zero documents eligible per the filter query.

// If every element in the index is eligible(eg. high selectivity
// cases), then this can basically be considered unfiltered kNN.
if len(eligibleDocIDs) == int(sb.numDocs) {
scores, ids, err := vecIndex.SearchWithoutIDs(qVector, k,
vectorIDsToExclude, params)
if err != nil {
return nil, err
}

addIDsToPostingsList(rv, ids, scores)
return rv, nil
}

// vector IDs corresponding to the local doc numbers to be
// considered for the search
vectorIDsToInclude := make([]int64, 0, len(eligibleDocIDs))
Expand Down Expand Up @@ -419,12 +432,51 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool

// Getting the vector IDs corresponding to the eligible
// doc IDs.
// The docVecIDMap maps each docID to vectorIDs corresponding
// to it.
// Usually, each docID has one vecID mapped to it unless
// the vector is nested, in which case there can be multiple
// vectorIDs mapped to the same docID.
// Eg. docID d1 -> vecID v1, for the first case
// d1 -> {v1,v2}, for the second case.
eligibleVecIDsBitmap := roaring.NewBitmap()
vecIDsUint32 := make([]uint32, 0)
for _, eligibleDocID := range eligibleDocIDs {
vecIDs := docVecIDMap[uint32(eligibleDocID)]
for _, vecID := range vecIDs {
eligibleVecIDsBitmap.Add(uint32(vecID))
vecIDsUint32 = append(vecIDsUint32, uint32(vecID))
}
}
eligibleVecIDsBitmap.AddMany(vecIDsUint32)

var selector faiss.Selector
var err error
// If there are more elements to be included than excluded, it
// might be quicker to use an exclusion selector as a filter
// instead of an inclusion selector.
if float32(eligibleVecIDsBitmap.GetCardinality())/
float32(len(vecDocIDMap)) > 0.5 {
ineligibleVectorIDs := make([]int64, 0, len(vecDocIDMap)-
len(vectorIDsToInclude))
for docID, vecIDs := range docVecIDMap {
for _, vecID := range vecIDs {
if !eligibleVecIDsBitmap.Contains(uint32(vecID)) {
if except != nil && !except.Contains(docID) {
ineligibleVectorIDs = append(ineligibleVectorIDs,
int64(vecID))
} else {
ineligibleVectorIDs = append(ineligibleVectorIDs,
int64(vecID))
}
}
}
}
selector, err = faiss.NewIDSelectorNot(ineligibleVectorIDs)
} else {
selector, err = faiss.NewIDSelectorBatch(vectorIDsToInclude)
}
if err != nil {
return nil, err
}

// Determining which clusters, identified by centroid ID,
Expand All @@ -433,7 +485,7 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
eligibleCentroidIDs := make([]int64, 0)
for centroidID, vecIDs := range centroidVecIDMap {
vecIDs.And(eligibleVecIDsBitmap)
if vecIDs.GetCardinality() > 0 {
if !vecIDs.IsEmpty() {
// The mapping is now reduced to those vectors which
// are also eligible docs for the filter query.
centroidVecIDMap[centroidID] = vecIDs
Expand Down Expand Up @@ -470,8 +522,8 @@ func (sb *SegmentBase) InterpretVectorIndex(field string, requiresFiltering bool
// Search the clusters specified by 'closestCentroidIDs' for
// vectors whose IDs are present in 'vectorIDsToInclude'
scores, ids, err := vecIndex.SearchClustersFromIVFIndex(
vectorIDsToInclude, closestCentroidIDs, minEligibleCentroids,
k, qVector, centroidDistances, params)
selector, len(vectorIDsToInclude), closestCentroidIDs,
minEligibleCentroids, k, qVector, centroidDistances, params)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ go 1.21
require (
github.com/RoaringBitmap/roaring v1.9.3
github.com/blevesearch/bleve_index_api v1.1.12
github.com/blevesearch/go-faiss v1.0.22
github.com/blevesearch/go-faiss v1.0.23
github.com/blevesearch/mmap-go v1.0.4
github.com/blevesearch/scorch_segment_api/v2 v2.2.16
github.com/blevesearch/vellum v1.0.10
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ github.com/bits-and-blooms/bitset v1.12.0 h1:U/q1fAF7xXRhFCrhROzIfffYnu+dlS38vCZ
github.com/bits-and-blooms/bitset v1.12.0/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8=
github.com/blevesearch/bleve_index_api v1.1.12 h1:P4bw9/G/5rulOF7SJ9l4FsDoo7UFJ+5kexNy1RXfegY=
github.com/blevesearch/bleve_index_api v1.1.12/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/go-faiss v1.0.22 h1:j6jwgCOy2a2EQUTOYxjBA59rMn5KPA0jbfYyHNgc2Ls=
github.com/blevesearch/go-faiss v1.0.22/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/go-faiss v1.0.23 h1:Wmc5AFwDLKGl2L6mjLX1Da3vCL0EKa2uHHSorcIS1Uc=
github.com/blevesearch/go-faiss v1.0.23/go.mod h1:OMGQwOaRRYxrmeNdMrXJPvVx8gBnvE5RYrr0BahNnkk=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.2.16 h1:uGvKVvG7zvSxCwcm4/ehBa9cCEuZVE+/zvrSl57QUVY=
Expand Down

0 comments on commit 5f8f0f0

Please sign in to comment.