Skip to content

Commit 0c7027f

Browse files
MB-60662: revert doc id cache (#222)
- reverts the cache to store the id dictionary and the max docID seen in segment - with this revert you would always create a fresh dictionary object and traverse the fst to get the max doc id per segment base
1 parent 4006929 commit 0c7027f

File tree

2 files changed

+10
-53
lines changed

2 files changed

+10
-53
lines changed

dict.go

+1-7
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ package zap
1616

1717
import (
1818
"fmt"
19-
"sync"
2019

2120
"github.com/RoaringBitmap/roaring"
2221
index "github.com/blevesearch/bleve_index_api"
@@ -31,10 +30,6 @@ type Dictionary struct {
3130
fieldID uint16
3231
fst *vellum.FST
3332

34-
// if the dictionary is shared across multiple threads
35-
// we need to protect the fstReader with a mutex
36-
// since it is not thread safe
37-
m sync.Mutex
3833
fstReader *vellum.Reader
3934

4035
bytesRead uint64
@@ -62,9 +57,8 @@ func (d *Dictionary) postingsList(term []byte, except *roaring.Bitmap, rv *Posti
6257
return d.postingsListInit(rv, except), nil
6358
}
6459

65-
d.m.Lock()
6660
postingsOffset, exists, err := d.fstReader.Get(term)
67-
d.m.Unlock()
61+
6862
if err != nil {
6963
return nil, fmt.Errorf("vellum err: %v", err)
7064
}

segment.go

+9-46
Original file line numberDiff line numberDiff line change
@@ -110,12 +110,6 @@ type SegmentBase struct {
110110

111111
m sync.Mutex
112112
fieldFSTs map[uint16]*vellum.FST
113-
114-
docIDMutex sync.RWMutex
115-
// cache the maximum docID seen in this segment
116-
cachedMaxDocID string
117-
//cache the dictionary for the _id field
118-
idDict *Dictionary
119113
}
120114

121115
func (sb *SegmentBase) Size() int {
@@ -587,57 +581,26 @@ func (s *SegmentBase) Count() uint64 {
587581
return s.numDocs
588582
}
589583

590-
func (s *SegmentBase) getDocIDinfo() (*Dictionary, string, error) {
591-
// obtain a read lock to check if the max doc ID and dict for _id is cached
592-
s.docIDMutex.RLock()
593-
cachedDocID := s.cachedMaxDocID
594-
cachedDict := s.idDict
595-
if cachedDocID != "" && cachedDict != nil {
596-
s.docIDMutex.RUnlock()
597-
// max doc ID and the id dict is cached, return it
598-
return cachedDict, cachedDocID, nil
599-
}
600-
s.docIDMutex.RUnlock()
601-
// not cached so obtain a write lock
602-
// to create the _id dict and read the FST
603-
// to get the max doc id and cache them
604-
s.docIDMutex.Lock()
605-
defer s.docIDMutex.Unlock()
606-
// check if the info is cached again
607-
// by some other thread to avoid unnecessary
608-
// ops.
609-
if s.idDict != nil && s.cachedMaxDocID != "" {
610-
return s.idDict, s.cachedMaxDocID, nil
611-
}
612-
// create the _id dict
613-
idDict, err := s.dictionary("_id")
614-
if err != nil {
615-
return nil, "", err
616-
}
617-
s.idDict = idDict
618-
// max doc ID is not cached, get it from the FST
619-
sMax, err := idDict.fst.GetMaxKey()
620-
if err != nil {
621-
return nil, "", err
622-
}
623-
// cache it
624-
s.cachedMaxDocID = string(sMax)
625-
return s.idDict, s.cachedMaxDocID, nil
626-
}
627-
628584
// DocNumbers returns a bitset corresponding to the doc numbers of all the
629585
// provided _id strings
630586
func (s *SegmentBase) DocNumbers(ids []string) (*roaring.Bitmap, error) {
631587
rv := roaring.New()
632588

633589
if len(s.fieldsMap) > 0 {
590+
idDict, err := s.dictionary("_id")
591+
if err != nil {
592+
return nil, err
593+
}
594+
634595
postingsList := emptyPostingsList
635-
idDict, maxDocID, err := s.getDocIDinfo()
596+
597+
sMax, err := idDict.fst.GetMaxKey()
636598
if err != nil {
637599
return nil, err
638600
}
601+
sMaxStr := string(sMax)
639602
for _, id := range ids {
640-
if id <= maxDocID {
603+
if id <= sMaxStr {
641604
postingsList, err = idDict.postingsList([]byte(id), nil, postingsList)
642605
if err != nil {
643606
return nil, err

0 commit comments

Comments
 (0)