From fd60128b92057dd8fb4cdaec3f4013e3f561cd1d Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 18 May 2022 15:17:01 +0530 Subject: [PATCH 01/16] bytes read while querying intial --- go.mod | 1 + go.sum | 2 ++ index/scorch/snapshot_index.go | 52 +++++++++++++++++++++------------- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/go.mod b/go.mod index 2d0be65e9..def97174c 100644 --- a/go.mod +++ b/go.mod @@ -26,5 +26,6 @@ require ( github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v0.0.5 go.etcd.io/bbolt v1.3.5 + golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 // indirect golang.org/x/text v0.3.7 ) diff --git a/go.sum b/go.sum index 9623b9398..fb994c2d6 100644 --- a/go.sum +++ b/go.sum @@ -114,6 +114,8 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 h1:nonptSpoQ4vQjyraW20DXPAglgQfVnM9ZC6MmNLMR60= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index a7ff7cf13..4f3adc52d 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -18,6 +18,7 @@ import ( "container/heap" "encoding/binary" "fmt" + "log" "os" "path/filepath" "reflect" @@ -28,7 +29,7 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/v2/document" index "github.com/blevesearch/bleve_index_api" - segment "github.com/blevesearch/scorch_segment_api/v2" + segmentl "github.com/blevesearch/scorch_segment_api/v2" "github.com/blevesearch/vellum" lev "github.com/blevesearch/vellum/levenshtein" bolt "go.etcd.io/bbolt" @@ -38,13 +39,13 @@ import ( var lb1, lb2 *lev.LevenshteinAutomatonBuilder type asynchSegmentResult struct { - dict segment.TermDictionary - dictItr segment.DictionaryIterator + dict segmentl.TermDictionary + dictItr segmentl.DictionaryIterator index int docs *roaring.Bitmap - postings segment.PostingsList + postings segmentl.PostingsList err error } @@ -140,7 +141,7 @@ func (i *IndexSnapshot) updateSize() { } func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, - makeItr func(i segment.TermDictionary) segment.DictionaryIterator, + makeItr func(i segmentl.TermDictionary) segmentl.DictionaryIterator, randomLookup bool) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) @@ -201,7 +202,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, } func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { - return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { return i.AutomatonIterator(nil, nil, nil) }, false) } @@ -229,7 +230,7 @@ func calculateExclusiveEndFromInclusiveEnd(inclusiveEnd []byte) []byte { func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { - return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { endTermExclusive := calculateExclusiveEndFromInclusiveEnd(endTerm) return i.AutomatonIterator(nil, startTerm, endTermExclusive) }, false) @@ -256,7 +257,7 @@ func calculateExclusiveEndFromPrefix(in []byte) []byte { func (i *IndexSnapshot) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix) - return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { return i.AutomatonIterator(nil, termPrefix, termPrefixEnd) }, false) } @@ -271,7 +272,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, return nil, err } - return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) }, false) } @@ -299,7 +300,7 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg) } - return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) }, false) } @@ -507,16 +508,17 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { + log.Printf("inside term field reader") rv := i.allocTermFieldReaderDicts(field) rv.term = term rv.field = field rv.snapshot = i if rv.postings == nil { - rv.postings = make([]segment.PostingsList, len(i.segment)) + rv.postings = make([]segmentl.PostingsList, len(i.segment)) } if rv.iterators == nil { - rv.iterators = make([]segment.PostingsIterator, len(i.segment)) + rv.iterators = make([]segmentl.PostingsIterator, len(i.segment)) } rv.segmentOffset = 0 rv.includeFreq = includeFreq @@ -524,16 +526,20 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.includeTermVectors = includeTermVectors rv.currPosting = nil rv.currID = rv.currID[:0] - + var bytesRead uint64 if rv.dicts == nil { - rv.dicts = make([]segment.TermDictionary, len(i.segment)) + rv.dicts = make([]segmentl.TermDictionary, len(i.segment)) for i, segment := range i.segment { + if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + bytesRead += segP.BytesRead() + } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } rv.dicts[i] = dict } + } for i, segment := range i.segment { @@ -543,6 +549,12 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, } rv.postings[i] = pl rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) + if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { + bytesRead += itr.BytesRead() + log.Printf("bytes read of disk for this query %v\n", bytesRead) + } + } } atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1)) return rv, nil @@ -624,13 +636,13 @@ func docInternalToNumber(in index.IndexInternalID) (uint64, error) { func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( segmentIndex int, localDocNum uint64, fields []string, cFields []string, - visitor index.DocValueVisitor, dvs segment.DocVisitState) ( - cFieldsOut []string, dvsOut segment.DocVisitState, err error) { + visitor index.DocValueVisitor, dvs segmentl.DocVisitState) ( + cFieldsOut []string, dvsOut segmentl.DocVisitState, err error) { ss := i.segment[segmentIndex] var vFields []string // fields that are visitable via the segment - ssv, ssvOk := ss.segment.(segment.DocValueVisitable) + ssv, ssvOk := ss.segment.(segmentl.DocValueVisitable) if ssvOk && ssv != nil { vFields, err = ssv.VisitableDocValueFields() if err != nil { @@ -689,7 +701,7 @@ func (i *IndexSnapshot) DocValueReader(fields []string) ( type DocValueReader struct { i *IndexSnapshot fields []string - dvs segment.DocVisitState + dvs segmentl.DocVisitState currSegmentIndex int currCachedFields []string @@ -745,7 +757,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} { func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} { rv := make(map[string]struct{}, len(i.segment)) for _, segmentSnapshot := range i.segment { - if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok { + if seg, ok := segmentSnapshot.segment.(segmentl.PersistedSegment); ok { rv[seg.Path()] = struct{}{} } } @@ -757,7 +769,7 @@ func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} { func (i *IndexSnapshot) reClaimableDocsRatio() float64 { var totalCount, liveCount uint64 for _, segmentSnapshot := range i.segment { - if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok { + if _, ok := segmentSnapshot.segment.(segmentl.PersistedSegment); ok { totalCount += uint64(segmentSnapshot.FullSize()) liveCount += uint64(segmentSnapshot.Count()) } From 6d99c50cb7b9218b7bf550c5ee44daf6875b7044 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Thu, 19 May 2022 16:00:29 +0530 Subject: [PATCH 02/16] adding the postings RBM bytes size to bytesRead --- index/scorch/snapshot_index.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 4f3adc52d..5b237fcd7 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -539,7 +539,6 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, } rv.dicts[i] = dict } - } for i, segment := range i.segment { @@ -549,11 +548,16 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, } rv.postings[i] = pl rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) + if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if postings, ok := pl.(segmentl.BytesOffDiskStats); ok { + bytesRead += postings.BytesRead() + } + if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { bytesRead += itr.BytesRead() - log.Printf("bytes read of disk for this query %v\n", bytesRead) } + log.Printf("bytes read of disk for this query %v\n", bytesRead) } } atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1)) From 40e12f9990eae6c922faf0522b8524abecf51ca0 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Tue, 24 May 2022 11:45:43 +0530 Subject: [PATCH 03/16] making bytesRead part of scorch stats --- index/scorch/scorch.go | 1 + index/scorch/snapshot_index.go | 26 ++++++++++++-------------- index/scorch/stats.go | 1 + 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 822093729..2ba852e91 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -582,6 +582,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["index_time"] = m["TotIndexTime"] m["term_searchers_started"] = m["TotTermSearchersStarted"] m["term_searchers_finished"] = m["TotTermSearchersFinished"] + m["num_bytes_read_query_time"] = m["TotBytesReadQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] m["num_items_introduced"] = m["TotIntroducedItems"] m["num_items_persisted"] = m["TotPersistedItems"] diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 5b237fcd7..d6a5f7a00 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -506,19 +506,19 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err return next.ID, nil } -func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, +func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { log.Printf("inside term field reader") - rv := i.allocTermFieldReaderDicts(field) + rv := is.allocTermFieldReaderDicts(field) rv.term = term rv.field = field - rv.snapshot = i + rv.snapshot = is if rv.postings == nil { - rv.postings = make([]segmentl.PostingsList, len(i.segment)) + rv.postings = make([]segmentl.PostingsList, len(is.segment)) } if rv.iterators == nil { - rv.iterators = make([]segmentl.PostingsIterator, len(i.segment)) + rv.iterators = make([]segmentl.PostingsIterator, len(is.segment)) } rv.segmentOffset = 0 rv.includeFreq = includeFreq @@ -526,12 +526,11 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.includeTermVectors = includeTermVectors rv.currPosting = nil rv.currID = rv.currID[:0] - var bytesRead uint64 if rv.dicts == nil { - rv.dicts = make([]segmentl.TermDictionary, len(i.segment)) - for i, segment := range i.segment { + rv.dicts = make([]segmentl.TermDictionary, len(is.segment)) + for i, segment := range is.segment { if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - bytesRead += segP.BytesRead() + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()) } dict, err := segment.segment.Dictionary(field) if err != nil { @@ -541,7 +540,7 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, } } - for i, segment := range i.segment { + for i, segment := range is.segment { pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i]) if err != nil { return nil, err @@ -551,16 +550,15 @@ func (i *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { if postings, ok := pl.(segmentl.BytesOffDiskStats); ok { - bytesRead += postings.BytesRead() + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, postings.BytesRead()) } if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { - bytesRead += itr.BytesRead() + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, itr.BytesRead()) } - log.Printf("bytes read of disk for this query %v\n", bytesRead) } } - atomic.AddUint64(&i.parent.stats.TotTermSearchersStarted, uint64(1)) + atomic.AddUint64(&is.parent.stats.TotTermSearchersStarted, uint64(1)) return rv, nil } diff --git a/index/scorch/stats.go b/index/scorch/stats.go index 626fff2e4..a1db86920 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -42,6 +42,7 @@ type Stats struct { TotAnalysisTime uint64 TotIndexTime uint64 + TotBytesReadQueryTime uint64 TotIndexedPlainTextBytes uint64 TotTermSearchersStarted uint64 From bb8fd0f4d0595471403d14387f1da3375d26a1b0 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Tue, 24 May 2022 13:58:05 +0530 Subject: [PATCH 04/16] resetting bytes read stat for every search --- index/scorch/scorch.go | 8 ++++++++ index/scorch/snapshot_index.go | 2 -- index_impl.go | 9 +++++++++ 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 2ba852e91..2a5d9ad7e 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -525,6 +525,14 @@ func (s *Scorch) Stats() json.Marshaler { return &s.stats } +func (s *Scorch) BytesRead() uint64 { + return s.stats.TotBytesReadQueryTime +} + +func (s *Scorch) ResetBytesRead() { + atomic.StoreUint64(&s.stats.TotBytesReadQueryTime, uint64(0)) +} + func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64, uint64, uint64) { var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64 diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index d6a5f7a00..fec5fb087 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -18,7 +18,6 @@ import ( "container/heap" "encoding/binary" "fmt" - "log" "os" "path/filepath" "reflect" @@ -508,7 +507,6 @@ func (i *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err err func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) { - log.Printf("inside term field reader") rv := is.allocTermFieldReaderDicts(field) rv.term = term diff --git a/index_impl.go b/index_impl.go index 8a9cfd3b4..d01407918 100644 --- a/index_impl.go +++ b/index_impl.go @@ -26,6 +26,7 @@ import ( "time" "github.com/blevesearch/bleve/v2/document" + "github.com/blevesearch/bleve/v2/index/scorch" "github.com/blevesearch/bleve/v2/index/upsidedown" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/registry" @@ -468,6 +469,14 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } }() + // TODO: need a better way to reset the bytes read + // exposes index type being scorch over here, is + // this fine? + if i.meta.IndexType == scorch.Name { + is, _ := i.i.(*scorch.Scorch) + is.ResetBytesRead() + } + searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ Explain: req.Explain, IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, From 00d9e5442223fe55b173962e7d071a8d807d3d13 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Mon, 30 May 2022 15:04:48 +0530 Subject: [PATCH 05/16] fixed the rcu logic wrt merger and introduced a new wcu stat --- index/scorch/merge.go | 24 ++++++++++++++++++++++++ index/scorch/scorch.go | 15 +++++++++++++-- index/scorch/stats.go | 1 + index_impl.go | 9 --------- 4 files changed, 38 insertions(+), 11 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 422527c6f..880749888 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -327,6 +327,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context, fileMergeZapStartTime := time.Now() atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1) + prevBytesReadTotal := cumulateBytesRead(segmentsToMerge) newDocNums, _, err := s.segPlugin.Merge(segmentsToMerge, docsToDrop, path, cw.cancelCh, s) atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1) @@ -352,6 +353,13 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context, atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1) return err } + + switch segI := seg.(type) { + case segment.BytesOffDiskStats: + segI.SetBytesRead(prevBytesReadTotal) + seg = segI.(segment.Segment) + } + oldNewDocNums = make(map[uint64][]uint64) for i, segNewDocNums := range newDocNums { oldNewDocNums[task.Segments[i].Id()] = segNewDocNums @@ -426,6 +434,16 @@ type segmentMerge struct { notifyCh chan *mergeTaskIntroStatus } +func cumulateBytesRead(sbs []segment.Segment) uint64 { + rv := uint64(0) + for _, seg := range sbs { + if segI, ok := seg.(segment.BytesOffDiskStats); ok { + rv += segI.BytesRead() + } + } + return rv +} + // perform a merging of the given SegmentBase instances into a new, // persisted segment, and synchronously introduce that new segment // into the root @@ -442,6 +460,7 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, filename := zapFileName(newSegmentID) path := s.path + string(os.PathSeparator) + filename + prevBytesReadTotal := cumulateBytesRead(sbs) newDocNums, _, err := s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s) @@ -463,6 +482,11 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotMemMergeErr, 1) return nil, 0, err } + switch segI := seg.(type) { + case segment.BytesOffDiskStats: + segI.SetBytesRead(prevBytesReadTotal) + seg = segI.(segment.Segment) + } // update persisted stats atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 2a5d9ad7e..e4e8e7974 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -387,6 +387,12 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { analysisResults := make([]index.Document, int(numUpdates)) var itemsDeQueued uint64 var totalAnalysisSize int + analysisBytes := func(tokMap index.TokenFrequencies) (rv uint64) { + for k := range tokMap { + rv += uint64(len(k)) + } + return rv + } for itemsDeQueued < numUpdates { result := <-resultChan resultSize := result.Size() @@ -394,6 +400,10 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { totalAnalysisSize += resultSize analysisResults[itemsDeQueued] = result itemsDeQueued++ + result.VisitFields(func(f index.Field) { + atomic.AddUint64(&s.stats.TotIndexedAnalysisBytes, + uint64(analysisBytes(f.AnalyzedTokenFrequencies()))) + }) } close(resultChan) defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize)) @@ -529,8 +539,8 @@ func (s *Scorch) BytesRead() uint64 { return s.stats.TotBytesReadQueryTime } -func (s *Scorch) ResetBytesRead() { - atomic.StoreUint64(&s.stats.TotBytesReadQueryTime, uint64(0)) +func (s *Scorch) SetBytesRead(val uint64) { + atomic.StoreUint64(&s.stats.TotBytesReadQueryTime, val) } func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64, @@ -592,6 +602,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["term_searchers_finished"] = m["TotTermSearchersFinished"] m["num_bytes_read_query_time"] = m["TotBytesReadQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] + m["num_analysis_bytes_indexed"] = m["TotIndexedAnalysisBytes"] m["num_items_introduced"] = m["TotIntroducedItems"] m["num_items_persisted"] = m["TotPersistedItems"] m["num_recs_to_persist"] = m["TotItemsToPersist"] diff --git a/index/scorch/stats.go b/index/scorch/stats.go index a1db86920..f15740cbb 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -44,6 +44,7 @@ type Stats struct { TotBytesReadQueryTime uint64 TotIndexedPlainTextBytes uint64 + TotIndexedAnalysisBytes uint64 TotTermSearchersStarted uint64 TotTermSearchersFinished uint64 diff --git a/index_impl.go b/index_impl.go index d01407918..8a9cfd3b4 100644 --- a/index_impl.go +++ b/index_impl.go @@ -26,7 +26,6 @@ import ( "time" "github.com/blevesearch/bleve/v2/document" - "github.com/blevesearch/bleve/v2/index/scorch" "github.com/blevesearch/bleve/v2/index/upsidedown" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/registry" @@ -469,14 +468,6 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr } }() - // TODO: need a better way to reset the bytes read - // exposes index type being scorch over here, is - // this fine? - if i.meta.IndexType == scorch.Name { - is, _ := i.i.(*scorch.Scorch) - is.ResetBytesRead() - } - searcher, err := req.Query.Searcher(indexReader, i.m, search.SearcherOptions{ Explain: req.Explain, IncludeTermVectors: req.IncludeLocations || req.Highlight != nil, From 6a6bca4f977c58b0ad12eef3989f000400890e52 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Tue, 31 May 2022 19:26:01 +0530 Subject: [PATCH 06/16] bug fix: fixed total bytes read value after merge --- index/scorch/merge.go | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 880749888..259978e5d 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -356,7 +356,8 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context, switch segI := seg.(type) { case segment.BytesOffDiskStats: - segI.SetBytesRead(prevBytesReadTotal) + totalBytesRead := segI.BytesRead() + prevBytesReadTotal + segI.SetBytesRead(totalBytesRead) seg = segI.(segment.Segment) } @@ -460,7 +461,6 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, filename := zapFileName(newSegmentID) path := s.path + string(os.PathSeparator) + filename - prevBytesReadTotal := cumulateBytesRead(sbs) newDocNums, _, err := s.segPlugin.Merge(sbs, sbsDrops, path, s.closeCh, s) @@ -482,11 +482,6 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot, atomic.AddUint64(&s.stats.TotMemMergeErr, 1) return nil, 0, err } - switch segI := seg.(type) { - case segment.BytesOffDiskStats: - segI.SetBytesRead(prevBytesReadTotal) - seg = segI.(segment.Segment) - } // update persisted stats atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count()) From 3d77ee39363de78c29e6099714c22c54a86266bf Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Thu, 9 Jun 2022 18:20:19 +0530 Subject: [PATCH 07/16] updated bytes read with stored fields and iterators' count --- index/scorch/scorch.go | 2 +- index/scorch/snapshot_index.go | 15 +++++++++++---- index/scorch/snapshot_index_tfr.go | 13 +++++++++++++ 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index e4e8e7974..f4c897c6d 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -402,7 +402,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { itemsDeQueued++ result.VisitFields(func(f index.Field) { atomic.AddUint64(&s.stats.TotIndexedAnalysisBytes, - uint64(analysisBytes(f.AnalyzedTokenFrequencies()))) + analysisBytes(f.AnalyzedTokenFrequencies())) }) } close(resultChan) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index fec5fb087..0930234e5 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -424,6 +424,10 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { segmentIndex, localDocNum := i.segmentIndexAndLocalDocNumFromGlobal(docNum) rvd := document.NewDocument(id) + var prevBytesRead uint64 + if seg, ok := i.segment[segmentIndex].segment.(segmentl.BytesOffDiskStats); ok { + prevBytesRead = seg.BytesRead() + } err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { if name == "_id" { return true @@ -453,7 +457,10 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { if err != nil { return nil, err } - + if seg, ok := i.segment[segmentIndex].segment.(segmentl.BytesOffDiskStats); ok { + delta := seg.BytesRead() - prevBytesRead + atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, delta) + } return rvd, nil } @@ -527,13 +534,13 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, if rv.dicts == nil { rv.dicts = make([]segmentl.TermDictionary, len(is.segment)) for i, segment := range is.segment { - if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()) - } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } + if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()) + } rv.dicts[i] = dict } } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index e983e3dd2..d66bafd75 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -76,6 +76,10 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in } // find the next hit for i.segmentOffset < len(i.iterators) { + prevBytesRead := uint64(0) + if itr, ok := i.iterators[i.segmentOffset].(segment.BytesOffDiskStats); ok { + prevBytesRead = itr.BytesRead() + } next, err := i.iterators[i.segmentOffset].Next() if err != nil { return nil, err @@ -89,6 +93,15 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in i.currID = rv.ID i.currPosting = next + // postingsIterators is maintain the bytesRead stat in a cumulative fashion. + // this is because there are chances of having a series of loadChunk calls, + // and they have to be added together before sending the bytesRead at this point + // upstream. + if itr, ok := i.iterators[i.segmentOffset].(segment.BytesOffDiskStats); ok { + delta := itr.BytesRead() - prevBytesRead + atomic.AddUint64(&i.snapshot.parent.stats.TotBytesReadQueryTime, uint64(delta)) + } + return rv, nil } i.segmentOffset++ From a95095590665b98805213a16714e7275a98d8b84 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 15 Jun 2022 12:12:46 +0530 Subject: [PATCH 08/16] accounting bytes read for loading term dictionaries in newIndexSnapshotFieldDict --- index/scorch/snapshot_index.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 0930234e5..4e6bb557f 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -150,6 +150,10 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, if err != nil { results <- &asynchSegmentResult{err: err} } else { + if seg, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, + seg.BytesRead()) + } if randomLookup { results <- &asynchSegmentResult{dict: dict} } else { From 230159d06718394d97ee3ddb7da9cf54d469039f Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Tue, 21 Jun 2022 17:05:10 +0530 Subject: [PATCH 09/16] unit tests and bug fixes --- index/scorch/snapshot_index.go | 40 ++++++-- index_test.go | 164 +++++++++++++++++++++++++++++++++ sample.txt | 10 ++ 3 files changed, 208 insertions(+), 6 deletions(-) create mode 100644 sample.txt diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 4e6bb557f..3d95106c9 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -146,13 +146,17 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, results := make(chan *asynchSegmentResult) for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { + var prevBytesRead uint64 + if seg, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + prevBytesRead = seg.BytesRead() + } dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { if seg, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, - seg.BytesRead()) + seg.BytesRead()-prevBytesRead) } if randomLookup { results <- &asynchSegmentResult{dict: dict} @@ -463,6 +467,7 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { } if seg, ok := i.segment[segmentIndex].segment.(segmentl.BytesOffDiskStats); ok { delta := seg.BytesRead() - prevBytesRead + // log.Printf("stored field section %v\n", delta) atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, delta) } return rvd, nil @@ -538,32 +543,48 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, if rv.dicts == nil { rv.dicts = make([]segmentl.TermDictionary, len(is.segment)) for i, segment := range is.segment { + var prevBytesRead uint64 + if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + prevBytesRead = segP.BytesRead() + } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()) + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()-prevBytesRead) } rv.dicts[i] = dict } } for i, segment := range is.segment { + var prevBytesReadPL uint64 + if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if postings, ok := rv.postings[i].(segmentl.BytesOffDiskStats); ok { + prevBytesReadPL = postings.BytesRead() + } + } pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i]) if err != nil { return nil, err } rv.postings[i] = pl + var prevBytesReadItr uint64 + if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { + prevBytesReadItr = itr.BytesRead() + } + } rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - if postings, ok := pl.(segmentl.BytesOffDiskStats); ok { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, postings.BytesRead()) + if postings, ok := pl.(segmentl.BytesOffDiskStats); ok && prevBytesReadPL < postings.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, postings.BytesRead()-prevBytesReadPL) } - if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, itr.BytesRead()) + if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok && prevBytesReadItr < itr.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, itr.BytesRead()-prevBytesReadItr) } } } @@ -684,10 +705,17 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( } if ssvOk && ssv != nil && len(vFields) > 0 { + var prevBytesRead uint64 + if ssvp, ok := ssv.(segmentl.BytesOffDiskStats); ok { + prevBytesRead = ssvp.BytesRead() + } dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } + if ssvp, ok := ssv.(segmentl.BytesOffDiskStats); ok { + atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, ssvp.BytesRead()-prevBytesRead) + } } if errCh != nil { diff --git a/index_test.go b/index_test.go index c0af3689b..ad5c52711 100644 --- a/index_test.go +++ b/index_test.go @@ -15,6 +15,7 @@ package bleve import ( + "bufio" "context" "encoding/json" "fmt" @@ -229,6 +230,169 @@ func TestCrud(t *testing.T) { } } +func TestBytesRead(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMapping := NewIndexMapping() + indexMapping.TypeField = "type" + indexMapping.DefaultAnalyzer = "en" + documentMapping := NewDocumentMapping() + indexMapping.AddDocumentMapping("hotel", documentMapping) + indexMapping.StoreDynamic = false + FieldMapping := NewTextFieldMapping() + FieldMapping.Store = false + documentMapping.AddFieldMappingsAt("reviews.content", FieldMapping) + FieldMapping = NewTextFieldMapping() + FieldMapping.Store = true + documentMapping.AddFieldMappingsAt("type", FieldMapping) + idx, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) + if err != nil { + t.Fatal(err) + } + + defer func() { + err := idx.Close() + if err != nil { + t.Fatal(err) + } + }() + + file, err := os.Open("sample.txt") + scanner := bufio.NewScanner(file) + batch := idx.NewBatch() + + type docStructure map[string]interface{} + + for scanner.Scan() { + var doc docStructure + docContent := (scanner.Text()) + json.Unmarshal([]byte(docContent), &doc) + err = batch.Index(fmt.Sprintf("%d", doc["id"]), doc) + if err != nil { + t.Fatalf("failed to create batch %v\n", err) + } + } + + err = idx.Batch(batch) + if err != nil { + t.Fatalf("failed to index batch %v\n", err) + } + query := NewQueryStringQuery("united") + searchRequest := NewSearchRequestOptions(query, int(10), 0, true) + + res, err := idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + stats, _ := idx.StatsMap()["index"].(map[string]interface{}) + prevBytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", prevBytesRead) + + // checking for reusability + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + fmt.Printf("res hits %v\n", len(res.Hits)) + + fuzz := NewFuzzyQuery("unitd") + fuzz.Fuzziness = 2 + searchRequest = NewSearchRequest(fuzz) + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + fmt.Printf("res hits %v\n", len(res.Hits)) + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v %v\n", stats["num_bytes_used_disk_by_root"], bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + typeFacet := NewFacetRequest("type", 2) + query = NewQueryStringQuery("united") + searchRequest = NewSearchRequestOptions(query, int(0), 0, true) + searchRequest.AddFacet("types", typeFacet) + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + min := float64(8000) + max := float64(8010) + numRangeQuery := NewNumericRangeQuery(&min, &max) + numRangeQuery.FieldVal = "id" + searchRequest = NewSearchRequestOptions(numRangeQuery, int(10), 0, true) + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + searchRequest = NewSearchRequestOptions(query, int(10), 0, true) + searchRequest.IncludeLocations = true + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + searchRequest = NewSearchRequestOptions(query, int(10), 0, true) + searchRequest.Fields = []string{"type"} + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + searchRequest = NewSearchRequestOptions(query, int(10), 0, true) + searchRequest.Fields = []string{"type"} + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + disQuery := NewDisjunctionQuery(NewMatchQuery("hotel"), NewMatchQuery("united")) + searchRequest = NewSearchRequestOptions(disQuery, int(10), 0, true) + res, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + fmt.Printf("bytes read dis %v\n", bytesRead-prevBytesRead) + prevBytesRead = bytesRead + + t.Errorf("erorr") +} + func TestIndexCreateNewOverExisting(t *testing.T) { tmpIndexPath := createTmpIndexPath(t) defer cleanupTmpIndexPath(t, tmpIndexPath) diff --git a/sample.txt b/sample.txt new file mode 100644 index 000000000..bede49972 --- /dev/null +++ b/sample.txt @@ -0,0 +1,10 @@ +{"title":"Edinburgh/Leith","name":"Ocean Apartments","address":"2 Western Harbour Midway","directions":null,"phone":"+44 131 553 7394","tollfree":null,"email":null,"fax":null,"url":"http://www.oceanservicedapts.com","checkin":"15H00","checkout":"10H00","price":"from £70","geo":{"lat":55.9812,"lon":-3.2248,"accuracy":"RANGE_INTERPOLATED"},"type":"hotel","id":8576,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"In my personal opinion, this hotel is one of the many hidden gems of Istanbul. Located in an area not very easy to get if you are driving yourself. I would advise taking a taxi to get there. The service from the first \"hello\" until the last \"bye\" was impecable. The terrace restaurant wiew is second to none. The food was excellent. The staff was very nice. In short, next time I am going to Isnbul, I dont believe I will stay anywehere else. Highly recomended.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Lindsey Wiegand III","date":"2013-01-01 16:30:13 +0300"},{"content":"When you first arrive at TomTom Suites you might wonder where you are coming to as the area looks a little run down! But it is in a great location in a street with no passing traffic. Its so quiet. Access to the main sites is easy. Downhill, the tram is within a 5 minute walk and uphill, Iskatel Cadesi ( a buzzing main street with loads of shops and restaurants) and a 2 minute walk round the corner leads to some narrow streets full of atmosphere and restaurants This boutique hotel itself is a sea of tranquility with beautifully appointed rooms, a great breakfast and very helpful staff. Our only regret was that the Terrace bar wasn't open but as the weather was unseasonably inclement it wasn't a problem. We had a great time and would definitely recommend the TomTom Suites","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":4.0},"author":"Bulah Weissnat","date":"2013-06-01 17:06:53 +0300"},{"content":"Staying at Tomtom was the best possible choice we could have made, everything was simply perfect: the location, the facilities, the staff, the quality of the food, the elegance and modern style of the furniture, the superb view of the terrace where we had breakfast as well as a wonderful dinner and a lovely evening. Many details made us feel at home, such as the Ipod dock in the rooms and the Ipads we could use free of charge during breakfast. Most of all, the friendliness of the staff, all of them extremely helpful. I'd highlight Chiara, who gave us many tips, especially telling us to go to Bagdah st., on the Asian side, a place great place that no books mentioned; and Ali, a wonderful guy, much more than a concierge, and a perfect host who did everything he could to make our stay as good as it gets. the proximity to Istklal street, and the tram make the location convenient for both day and night. Considering everything, including the fair rates cherged, I doubt there's a better choice in town.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Ottis Pacocha","date":"2012-08-07 08:16:49 +0300"},{"content":"A really wonderful hotel in a superb location on a traffic free road. It was so peaceful it was easy to forget that we were in the middle of one of the world's busiest cities. The staff were deligthful and couldn't do enough to help. The hotel is in a converted monastary which has been sympathetically refurbished, the artwork depicting istanbul brings a touch of colour - especially in the lift shaft. Our room was enormous and well appointed with a luxurious large bathroom - and yes, a jacuzzi bath. The rooftop restaurant and bar was a perfect way to relax at the end of the day and must have some of the best views in Istanbul over the Bosphorous and the Golden Horn. The food was excellent, modern turkish using french cooking techniques and local produce. Overall I can't wait to go back and I can't recommend the place highly enough.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Dr. Amira Murazik","date":"2013-10-14 03:30:16 +0300"},{"content":"My wife, mother and I really enjoyed our recent stay at TomTom. We always prefer smaller boutique places that are well designed and in actual neighborhoods vs. larger impersonal luxury hotels. TomTom didn't dissapoint! As previously mentioned, beautifully designed and furnished rooms, the best toiletries, etc. The roof deck is very nice with a great view. The location is amazing, in-between everywhere that you want to go but far enough away from the touristy madness. I'd argue it's one of the best situated hotels in Istanbul. As mentioned, it's a very steep but quick walk up to the main street of Isklal. However, this can be challenging if you have any mobility issues. In general, the area is rather hilly but so is most of Istanbul. In the other direction, you can walk up into a neighborhood renowned for antiques and nice cafes or down to the tram that takes you to the main tourist attractions or the road to get to Ortokoy or Bebek. At no point does it feel like you are in the \"tourist district\" but you never feel unwelcome or out of place. As for noise, we found the building itself to be quiet and well insulated. You didn't hear noise from the rooms around you and minimal noise from the room above. Nowhere near as bad as what an earlier reviewer described. The only real noise to speak of does come from the kids in the morning and the afternoon who walk on the dead-end road to and from school. Other than this, and some kids playing soccer on occasion, the street outside of the hotel is pretty quiet Breakfast in the morning is great. Freshly baked pastries (done at the hotel) and fruit, cheeses and meats, served by extremely friendly staff. Fuat in particular is a delight to interact with and was extremely helpful. The staff overall is very friendly and accommodating. Everyone at the front desk regardless of time of day did whatever they could to make sure we were receiving the best service possible. There are however a few things that need improvement to make the hotel even better. -While breakfast is excellent, we were less than impressed with dinner. To the point that we ate there once and did not choose to repeat the experience. The service was good but the quality of the food and the cooking was pretty bad esp. considering the prices of the food vs. what can be had in the neighborhood for much less. We are admitted foodies, but we didn't hold the hotel restaurant up to lofty expectations. This needs to be addressed. -While the staff is very friendly and eager to help, the hotel would benefit from a dedicated concierge or someone who really knows about the various restaurants around the city. With a hotel of this caliber, we expected them to be more informed about different places and to let us know how traffic can affect getting there, etc. If you didn't ask them these kinds of questions, it didn't occur to them to tell you. -Lastly, with the windows closed in the room there is zero airflow without having the heat on and it's extremely dry. I understand them not wanting to run the AC unless the temperature warrants, but there should be a way to circulate air without opening the windows in the spring (see my earlier comment about noise from school kids in the morning). In conclusion, we wouldn't hesitate to recommend TomTom to friends and hope to stay there again the next time we are in Istanbul.","ratings":{"Service":4.0,"Cleanliness":5.0,"Overall":5.0,"Value":4.0,"Sleep Quality":4.0,"Rooms":5.0,"Location":5.0},"author":"Marcelle Haley","date":"2015-07-17 19:17:23 +0300"},{"content":"We had been at Istanbul for business purposes and used to stay at the well-known brands at Taxim. This time the trip was for leisure and we were looking for a charming, cozy, friendly, clean, staff friendly boutique hotel. So, THIS IS TomTom suites and we consider ourselves very lucky to stay there. Due to the Italian embassy its socak is not crowded and safe 24/7. Breakfast variety was very satisfactory, view from terrace wonderful, staff very friendly, very quiet and clean. If you are looking for a hotel with ID, we strongly recommend it. Be aware that Istiklal street is only 5 minute walking, but uphill.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Location":4.0,"Rooms":5.0},"author":"Peggie Little","date":"2014-04-22 04:05:24 +0300"}],"public_likes":["Ms. Braulio Kuhic"],"vacancy":true,"description":"Modern, stylish contemporary serviced apartments 4 miles for Edinburgh's city Centre.","alias":"Serviced Apartments","pets_ok":false,"free_breakfast":true,"free_internet":false,"free_parking":true} +{"title":"Edinburgh/Old Town","name":"Euro Hostel Edinburgh","address":null,"directions":null,"phone":"+44 8454 900 461","tollfree":null,"email":null,"fax":null,"url":"http://www.euro-hostels.co.uk/Edinburgh_hostel/","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.94825,"lon":-3.18805,"accuracy":"APPROXIMATE"},"type":"hotel","id":8661,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"A plain and simple hotel, located on a busy street with many company offices nearby I can't imagine a tourist staying here. It is far away from the city center, and anything that a tourist might be interested in. In general, you will need a car, or a taxi to get to anything unless your business office is within walking distance. About €25 from the airport via taxi, this fairly modern looking hotel is very plain and simple in desgin, layout and comforts. Functional, and at a rate of about €85 per night - it was almost a bargain. Surprisingly quite given the location on the street, my \"no smoking\" room was clearly smoked in. Upon complaining - it was explained to me that the room was in fact no-smoking. No offer to move me, no thought that there might be an issue .... who knows, perhaps they had heard this before. Breakfast included, a simple breakfast that was OK in general, and nothing special. I would only recommend staying here if you are close enough to what you need to go to that you can walk.","ratings":{"Service":3.0,"Cleanliness":2.0,"Overall":2.0,"Value":3.0,"Sleep Quality":2.0,"Rooms":2.0,"Location":1.0},"author":"Rachel O'Hara","date":"2014-03-12 14:29:07 +0300"},{"content":"We (2 couples) recently stayed at the Hotel for 4 nights in their standard rooms -- 1 room off the garden and 1 in the main building.Both rooms were on the small side but the quality of the rooms more than compensated for their size.Breakfast was excellent.The hotels main asset in our eyes were all their staff who were very professional and looked after all our needs exceptionally well.The location of the hotel is excellent close to shops sights and restaurants. If you are travelling to Paris for a short trip i would recommend staying here.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":4.0,"Value":4.0,"Sleep Quality":5.0,"Rooms":3.0,"Location":5.0},"author":"Viola Reinger","date":"2012-09-21 02:53:29 +0300"}],"public_likes":["Laila Jacobs","Dr. Andreane Berge","Ophelia Walter","Mac Hackett","Belle Bartell","Spencer Erdman","Elna Monahan","Shanelle Hayes","Ms. Wallace Larkin"],"vacancy":false,"description":"Kincaid's Court, Guthrie Street. In Cowgate, open every summer from June 8th until September 2nd. Budget accommodation in 43 apartments used as student residences during term time.","alias":null,"pets_ok":false,"free_breakfast":false,"free_internet":true,"free_parking":true} +{"title":"Edinburgh/Old Town","name":"The Sheraton Grand Hotel","address":null,"directions":null,"phone":"+44 131 229 9131","tollfree":null,"email":null,"fax":null,"url":null,"checkin":null,"checkout":null,"price":null,"geo":{"lat":55.947,"lon":-3.2073,"accuracy":"APPROXIMATE"},"type":"hotel","id":8662,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"Really loved this hotel, it was beautifully decorated (very much interior designed) and was spotlessly clean. We had a booked a superior double and when we arrived we were told we'd been upgraded. The room was fairly large by Paris standards and had a day bed, which I assume could sleep a 3rd person/child. The bathroom was really modern and had a bath and large separate shower with large overhead shower head, plus hand held shower. All the decor was tasteful and we were at the back of the hotel overlooking the small courtyard garden, so very quiet, although the hotel is on a quiet street anyway. Short walk to the Jardin du Luxembourg and less that 5 mins to a metro, which was on a direct line to the Gare du Nord, so perfect for us as we took the Eurostar from London. Hotel booked us a table at a great restaurant, superb food, which they recommended. Breakfast was served to order and you got croissants, pain au chocolat, bread and could choose omelettes etc. The fruit salad was freshly made, the yoghurts were the posh ones in glass jars. Would really recommend the hotel, but not for small children - there are a lot of carefully placed vases and objets d'art that little fingers will want to touch...","ratings":{"Cleanliness":5.0,"Sleep Quality":5.0,"Overall":5.0,"Value":4.0,"Service":5.0},"author":"Brittany Ledner Jr.","date":"2012-06-21 07:48:16 +0300"},{"content":"I spent a wonderful week at the Villa Madame, finding the staff very helpful and gracious. My 5th floor room was very clean and light, quiet, and comfortable. The hotel is extremely well located only a few short blocks to the metro, Jardins du Luxemburg, and shopping. The hotel offered free wifi, and much more surprisingly, free international telephone service from my room via voip. Breakfasts were excellent, and the garden area was quiet and comfortable. Just around the corner is Maison du Jardin, a very excellent small restaurant with prix fixe 31 euro dinners. Staff made other great recommendations as well. I would unhesitatingly recommend and will return.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":4.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Lauren Ortiz","date":"2014-10-01 14:04:03 +0300"},{"content":"My friend and I were backbacking through London and Paris and decided to splurge a little on this hotel. It turned out amazing. You really don't want to risk a hostel in Paris, they are serious dumps. I did a hostel in Paris in 2007. I picked this hotel due to it's proximity to nightlife in the Latin Quarter and it looked nice. The bathroom had a walk in shower with glass door. Flatscreen TV had an aux speaker in the bathroom to hear the tv while doing your thing. Beds were very comfortable and the room we got was big and facing the street. The street was loud during mid afternoon due to the Catholic school playground down the block. We booked a room with two singles and when we went to check in we were told they had inadvertantly given our room away. I started to freak because I was sure we would get a fast one pulled as usually happens in western europe. We were happy that after my face of death look we were upgraded to a larger room with two double beds. Unfortunately for the poor lady who had booked the room they bumped her and I don't know where she was then moved. Overall the staff was slow on check'in. Breakfast was carbs and coffee. They have a person dedicated only to breakfast and coffee however it seems odd because this is a very small hotel. We were the youngest guests in the hotel by about 50 years. Everyone else looked like they were late fifties to mid seventee's. We are single thirtysomethings. I would recommend this hotel to anyone who wants a peacefull nights sleep in Paris.","ratings":{"Service":4.0,"Cleanliness":5.0,"Overall":4.0,"Value":2.0,"Location":5.0,"Rooms":4.0},"author":"Turner Ferry","date":"2013-11-29 14:38:35 +0300"},{"content":"The Villa Madame is a lovely, comfortable, well-located boutique hotel with excellent service. My wife and I enjoyed our stay there immensely. This hotel is chic and luxuriously comfortable especially for the price. Admittedly, the room we had (Classic Double Room) was the smallest hotel room my wife and I had ever seen at about 3m x 3.5m (10' x 12'). The large outdoor terrace, fantastic bathroom (with Hermès products and great water pressure), deliciously comfortable bed, iPod docking station, super-fast free wi-fi and excellent service more than made up for the tiny room. Alex at the front desk was a delight as she answered our every question and was always happy to chat and share information. The location is excellent being only a few minutes walk from either the Rennes or St-Sulpice Metro stops, 100m from the beautiful Jardin du Luxembourg and within easy walking distance of the myriad of shops and restaurants on both the Rue de Rennes and Boulevard Saint-Germain. The hotel serves a decent continental breakfast which seems expensive at 18€ but we found a package that was below the normal rate and included breakfast. The breakfast room, as other reviewers have noted, is small and has only four tables for two. All of the tables were occupied each time we went for breakfast but the hotel happily served us as we sat in the little lounge area so it wasn't ever a problem. They don't serve lunch or dinner but there are two brasseries within 50m of the front door and many more restaurants and coffee shops just a few blocks away. There were a few other minor concerns that I had regarding this hotel: - The minibar is stocked only with two bottles of water. It would be nice if there was a limited selection of other beverages and also a kettle for tea/coffee service. - The satellite television has almost 900 channels (yes, I went through them all late one night) but half of them are Arabic and almost all of the rest are in French save for one German language station, a couple of Italian stations and only Bloomberg for the English speakers. Although one doesn't go to Paris to watch TV in the room, it's nice to relax and watch the news or a movie after a long day on the town but unless the above suits you, you won't have much cause to even turn on the set. - The street is very small and it is very difficult to find parking. This is likely not a problem for most visitors but is something to keep in mind especially if you rent a car. Overall, my wife and I loved this hotel. The few cons are heavily outweighed by the comforts we enjoyed. We are very seasoned travelers and despite the tiny room, this hotel experience was one of our best ever. We will definitely stay at the Villa Madame again.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Location":5.0,"Rooms":5.0},"author":"Blaze Williamson","date":"2014-04-26 13:59:54 +0300"}],"public_likes":["Madelynn Littel","Marielle Daugherty","Micah Stiedemann","Sandra Howe","Angela Oga"],"vacancy":true,"description":"21 Festival Square. Against the backdrop of majestic Edinburgh Castle, the Sheraton Grand Hotel and Spa combines city centre convenience with warm Scottish hospitality.","alias":null,"pets_ok":false,"free_breakfast":true,"free_internet":false,"free_parking":false} +{"title":"Edinburgh/Old Town","name":"Radisson Blu Hotel","address":"80 High St","directions":null,"phone":"+44 131 557 9797","tollfree":null,"email":null,"fax":null,"url":"http://www.radissonblu.co.uk/hotel-edinburgh","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.95014,"lon":-3.18667,"accuracy":"ROOFTOP"},"type":"hotel","id":8663,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[],"public_likes":[],"vacancy":false,"description":"The Royal Mile. Less than a five-minute walk from major shopping and business districts, and the Edinburgh International Conference Centre is only a short taxi ride away.","alias":null,"pets_ok":false,"free_breakfast":true,"free_internet":true,"free_parking":false} +{"title":"Edinburgh/Old Town","name":"Hotel Missoni","address":null,"directions":null,"phone":"+44 131 220 6666","tollfree":null,"email":null,"fax":null,"url":"http://www.hotelmissoni.com/hotelmissoni-edinburgh","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.9491,"lon":-3.19275,"accuracy":"APPROXIMATE"},"type":"hotel","id":8664,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"We stayed for 3 nights in March, 2 consecutive and then one at the end of the trip. The area near the Termini station is not the prettiest, but it is very convenient if you are using public transportation. My suggestion when exiting Termini station is to go RIGHT and walk down about 4 blocks and then right again and over 2 blocks. We were travelling with quite a bit of luggage and we ended up going the wrong way too many times. We had no problems in the area and loved the convenience of the location. We walked to Termini to either pick up the Metro train or a bus to most all of the sites. The room was spotless and the Breakfast was delicious. Assunta - the owner, could not have been more helpful, although her sense of direction is not like ours in America. Just a short walk maybe a lot longer than we Americans are used to. Hoping on the #70 bus gets you to almost any tourist site (or close to) and most buses all head back to Termini.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":4.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":3.0,"Location":3.0},"author":"Hermina Schinner","date":"2015-03-06 22:56:15 +0300"},{"content":"This motel may not be the Ritz Carlton but if your looking for value, a great location and safe neighborhood then you've chosen the right place! It is right across the street from the CBS studios, the line for the price is right is just around the corner. My son and I stayed there for a week and received great service from the maids right on to the front desk. The front desk was more helpful than any concierge I have ever seen and they are just a wonderful hard working family. If I ever go back to LA I will definitely stay at the Beverly Inn even if it's only for the friendly service.","ratings":{"Service":5.0,"Cleanliness":4.0,"Overall":4.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":4.0,"Location":5.0},"author":"Jose Swaniawski Sr.","date":"2014-02-12 20:15:16 +0300"},{"content":"This place is car motel that has seen much better days; the beds are old and offer no support, the televisions, carpets, and furnishings are likewise well-used, and the overall effect can be somewhat depressing. However, it does offer limited parking and convenient location at a very inexpensive rate and the bathrooms were clean; it attracts tourists who are more interested in the surrounding neighborhoods (which are quite nice with wonderful restaurants) and less interested in where they stay for the night. There was a cafe down the street when I was there in 2002, it's along major bus lines, with a terrific market close by across the street. It is also right across from the CBS studios, and about four blocks walking from an old-style diner and bakery. The comments on other sites focus on the ethnic background of the clerks and manager, but I found them pleasant and accommodating. That is why I actually felt safe there as a single female traveler (although I suspect others would not feel this way). This is just slightly above average for what you would expect for the price, but you do get what you pay for.","ratings":{"Cleanliness":1.0,"Overall":2.0,"Value":4.0,"Service":3.0,"Rooms":2.0},"author":"Mr. Ellis Heller","date":"2012-03-02 00:20:56 +0300"},{"content":"Dirty, cock roach infested, unsafe, and very noisy. This motel has not been updated in 25 years or more. It is very noisy (even with ear plugs) because you are a stone throw away from the Dolphin Expressway. The carpet and ceramic floors are filthy along with the furniture and bedding. Someone tried entering my room in the middle of the night, thank goodness for deadbolt and door chain. Photos online are very deceiving. RECOMMEND - DO NOT STAY AT THIS MOTEL. This motel should not be part of the Choice Hotel chain. Should be called \"Last Choice\".","ratings":{"Service":2.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0,"Location":1.0},"author":"Lottie Gerhold IV","date":"2014-06-12 22:56:30 +0300"},{"content":"Creepy, dirty, dark, depressing. It looks like the stereotypical motel in the movies where a drug deal goes bad and people get murdered. Rooms smell of chemical perfumed disinfectant and it burns your nose and lungs... but you will be too afraid for your safety to open a window or door! We stayed at this hotel from 3pm to 10pm (never actually slept overnight, thank god!) because we had a late night flight and it was pouring rain in Miami. The hotel says it's \"newly renovated\" and had free WIFI, so we figured it would be a nice place to spend a few hours and relax. HA! NOOOO! We laid on top of the beds, just to watch tv and instantly became itchy. Nothing about this hotel was \"newly renovated.\" The WIFI was extremely slow. The main picture of this hotel is deceiving. It looks a LOT worse and run-down in real life! Don't bother with this place.","ratings":{"Service":2.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0},"author":"Niko Keebler","date":"2014-12-12 12:32:09 +0300"},{"content":"We didn't stay at the hotel so I can't comment on the rooms. We did leave our car there while we went on a cruise ($5/night parking.) While we were gone someone siphoned ALL of the gas out of our vehicle. I have called the manager twice to alert her to the problem. I've left messages concerning \"a security issue at the hotel\" and no one has returned my call. I guess they don't care about security. Next time I will spend the $20/day to park at the port since that seems to be the only secure parking to be had.","ratings":{"Overall":1.0},"author":"Miss Alysha Goldner","date":"2012-01-17 00:40:45 +0300"},{"content":"I stayed at this motel for one night with my partner in August 2010. We had a flight early in the morning from Miami airport so we wanted a hotel close to the airport. I booked this through BOOKING.COM and payed £45. I have never stayed in Miami before so we did not know what areas were good and what was bad. When we checked in I wasa little concerned as there was a security hut and a guard at the door. We checked in and it was very run down and dirty. They asked for a credit card but I insisted on paying cash as there was no way I was going to hand over my card details. We parked our car and tried to find our room. The halls were all outdoors and very run down. There was a sign saying it had just had a refurb. I could not see where from the outside. It seemed that the area in Miami the motel was in was not a very good one, this concerned us a little. We finally found our room and went in. The room was very basic and smelt of damp. The dead lock on the door was broken and there was no safe in the room. We felt that uncomfortable that at night we pushed our suit cases up to the door. The shower had no presure and was only luke warm and the paint was peeling off from the bathroom. What really appauld us was the floor. I took my shoes and socks off and was walking around in bare foot. After two minutes my partner said \"LOOK AT YOUR FEET\". They were black. The floor was that dirty that in 2 mins my feet were black. I wet a towel and rubbed it along the floor. The towel changed to black and the carpet changed colour. The floor could do with a really good clean. The TV reception was very poor and fuzzy and we gave up in the end. Also the internet/Wifi had little/no signal. It was noisey outside and there were Police sirens sounding all night outside. We found it hard to sleep. We only stayed the night as we had to get up at 4 am to check in for our flight and we checked in at 8pm that evening. Otherwise we would have moved hotel. The only plus side was it was close to the airport. We did not stay for the breakfast but if itn was anything like the room we would have passed anyway. We did not use the pool as it was on the otherside of the motel right next to a main highway. It looked dirty and very uninviting. Please only stay at this motel if you have to or if it is free and you are feeling brave.","ratings":{"Service":1.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":2.0,"Location":3.0},"author":"Miss Weldon Flatley","date":"2015-05-21 14:38:02 +0300"}],"public_likes":["Ms. Jaleel Bartell","Rodger Jerde","Hanna Simonis"],"vacancy":false,"description":"1 George IV Bridge. Situated on the Royal Mile and designed by Rosita Missoni.","alias":null,"pets_ok":true,"free_breakfast":true,"free_internet":false,"free_parking":false} +{"title":"Edinburgh/South","name":"Argyle Backpackers","address":"14 Argyle Pl","directions":"The number 41 bus (catch it outside Waverley railway station) goes right past the front door.","phone":"+44 131 667 9991","tollfree":null,"email":null,"fax":null,"url":"http://www.argyle-backpackers.co.uk/","checkin":null,"checkout":null,"price":"Dorm from £13","geo":{"lat":55.9385,"lon":-3.1912,"accuracy":"ROOFTOP"},"type":"hotel","id":8685,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"Made a one night reservation at this \"hotel\" without checking the reviews, big mistake. The hotel looks OK from the highway and looked conveniently located. Should have known when the very unfriendly girl at the front desk gave me the room key wrapped in a post-it note because they had run out of the little envelopes. The room smelled weird, when we opened the chest drawers we saw what looked like small roaches, we never took anything out of the suitcase. It was very cold that night in Miami and the heating unit blew cold air. The next morning we woke up to find out there was no hot water in the whole hotel, after many calls and trips to the front desk I was informed that there was someone coming to fix the boiler. To make a long story short, the hot water came back on at 12:00pm; checkout time is 11:00am. They wanted us to leave without taking a shower, the maid was also annoyed at us because we didn't leave the room and she had to finish to go home. Later that night while at a restaurant in South Beach, I noticed some itchy bumps on the left side of the back of my neck and scalp, as well as the knuckles of my fingers, they turned out to be BED BUG BITES!!!","ratings":{"Service":1.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0,"Location":4.0},"author":"Consuelo Thiel","date":"2013-03-28 07:41:35 +0300"},{"content":"Restaurant service was o.k. Your continental breakfast is a joke. We were there 2 nights. We had 4 rooms which were reserved in August 2008. The last couple to get there Friday niight got a terrible room. Nothing in it but the bed. They wre given a different room the next morning but the damage was done. No we will not be back.","ratings":{"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Service":1.0,"Rooms":1.0},"author":"Ettie Bartell","date":"2012-09-04 06:14:54 +0300"},{"content":"Disappointing, after all these years... I have been staying at the Capri since 2001, when my family moved from SF to the North Bay. It used to be a great deal for us ex-pats and while not a luxury hotel, it was clean and comfortable (plus in a great location).My son and I stayed at the Capri this summer and we were terribly let down. They no longer offer specials and while there is renovation occurring, our room was dingy and the bed very uncomfortable. My 9 year old son said, Mom, let's not stay at the Capri anymore. Sadly, I had to agree.","ratings":{"Service":3.0,"Business service":-1.0,"Cleanliness":2.0,"Check in / front desk":3.0,"Overall":2.0,"Value":2.0,"Rooms":2.0,"Location":4.0},"author":"Harry O'Kon I","date":"2014-09-10 11:07:33 +0300"},{"content":"Best Deal in the Marina If you don't mind 1960's decor this place will fit the bill. It's very reasonable and very clean. Sometimes it books up with the Euro tours. Ask to stay on the 3rd floor, with the high ceilings and roof windows. I have actually heard the fog horns at night. There is also plenty of free parking right on-site.This hotel is right off Union street and within walking distance of the Marina Green. Tons of great restaurants and clubs. One of my favorites is the Brazen Head, which is right across the street. A small English Pub with great food and drinks. It's hard to spot but there's a real small sign out front. Also, El Canasta (sp?) has a great steak burrito.","ratings":{"Service":-1.0,"Business service":-1.0,"Cleanliness":-1.0,"Check in / front desk":-1.0,"Overall":4.0,"Value":-1.0,"Rooms":-1.0,"Location":-1.0},"author":"Sibyl Lind","date":"2014-09-20 18:19:21 +0300"},{"content":"Bed Bugs and Ants The ants didn't bother me. It was the bed bugs I detested.Before staying at the Buena Vista I didn't know what a bed bug looked. But the spots on my arms that looked like flea bites kept appearing after I got home. So I googled them.Yep, sure enough! Up popped a photo of the same type of bug that I had killed after I found it crawling on my husband's pillow while we were staying at the Buena Vista this January 2008!I ordered some all natural bed bug powder to dust all over my house. Even my kids are showing up with the spots.When I called the hotel after I got home to tell them about it they said they would block off that room. But when I called a day or two later there was someone in that room. It doesn't matter if they block off and treat one room. They've got to do the entire premises.","ratings":{"Service":5.0,"Business service":-1.0,"Cleanliness":1.0,"Check in / front desk":5.0,"Overall":2.0,"Value":5.0,"Rooms":1.0,"Location":5.0},"author":"Deshawn Rippin","date":"2014-10-17 15:46:51 +0300"},{"content":"Not a great place to stay This place is falling apart...was once a nice little place to stay but not now. Lobby was shabby and dirty, room was not better...there was mold in the tub, no movies here to buy on cable, iron in room was broke....etc. For the $160 they charged per night ( with a AAA card), I would not go back...there is a very nice small, totally remodeled motel two blocks down that we should have stayed at for the same price and most definitly will next time...It is called Hotel Del Sol...check it out, thehoteldelsol.com....much, much better for the $$$$.","ratings":{"Service":2.0,"Business service":1.0,"Cleanliness":1.0,"Check in / front desk":3.0,"Overall":2.0,"Value":1.0,"Rooms":2.0,"Location":4.0},"author":"Wayne Tremblay III","date":"2012-10-27 01:48:16 +0300"},{"content":"Great Budget Accommodation If you want a small budget sized no frills hotel that offers a resonable level of service then this is for you. The rooms were large and the housekeeping very good. The front desk service was always helpful and friendly with good advice. Traffic noise was not as bad as expected and nor was the beds. Only minus was the continental breakfast American style which differed somewhat from what we experienced in other countries. Too sweet for our taste.Overall a great experience and well located although being closer to eateries would have been appreciated. Can recommend the Liquor Store accross the road. Their staff were great!","ratings":{"Service":3.0,"Business service":-1.0,"Cleanliness":3.0,"Check in / front desk":-1.0,"Overall":3.0,"Value":3.0,"Rooms":4.0,"Location":-1.0},"author":"Margaretta Miller","date":"2012-04-19 20:46:49 +0300"}],"public_likes":["Narciso Wiegand","Graciela Bailey","Kavon Bruen","Aditya Feest","Caleb Medhurst","Ross Rippin","Germaine Kunde"],"vacancy":true,"description":"Two good self-catering kitchens, garden, conservatory/seating area, choice of different sized dorms, and private rooms.Definitely not a party hostel.","alias":null,"pets_ok":true,"free_breakfast":true,"free_internet":true,"free_parking":false} +{"title":"Abbeville","name":"Chez Mel","alt":null,"address":"63-65 rue Saint-Vulfran","directions":null,"phone":"+33 3 22 19 48 64","tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"With an old style setting and musical accompaniment, this is a hearty and family-friendly crêpe restaurant. It is also a tea room in the afternoon.","geo":{"lat":50.104437,"lon":1.829432,"accuracy":"RANGE_INTERPOLATED"},"activity":"eat","type":"landmark","id":33,"country":"France","city":"Abbeville","state":"Picardie"} +{"title":"Aberdour","name":"Aberdour Castle","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":"http://www.historic-scotland.gov.uk/propertyresults/propertyoverview.htm?PropID=PL_001","hours":null,"image":null,"price":null,"content":"Is a fascinating, 12th-century castle which was granted by Robert the Bruce to his friend and nephew, Thomas Randolph, Earl of Moray. It includes the beautiful and well-maintained castle gardens, as well as a spectacular beehive-shaped dovecot built at the end of the sixteenth century.","geo":{"lat":56.0552,"lon":-3.2985,"accuracy":"APPROXIMATE"},"activity":"see","type":"landmark","id":35,"country":"United Kingdom","city":"Aberdour","state":null} +{"title":"Aberdour","name":"The Silver Sands Beach","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"is one of Scotland's seven Blue Flag awarded beaches, and is incredibly popular in summer time. For those after a bit of peace and quiet, the '''Black Sands Beach''' may be more to your tastes.","geo":{"lat":56.0544,"lon":-3.2863,"accuracy":"ROOFTOP"},"activity":"see","type":"landmark","id":36,"country":"United Kingdom","city":"Aberdour","state":null} +{"title":"Aberdour","name":"Aberdour Railway Station","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"is a beautifully kept and cared for example of a traditional station, and regularly wins the "Best Station and Gardens in Great Britain" award.","geo":{"lat":56.05471,"lon":-3.30089,"accuracy":"RANGE_INTERPOLATED"},"activity":"see","type":"landmark","id":37,"country":"United Kingdom","city":"Aberdour","state":null} \ No newline at end of file From 54590cc8d75d17b74061cda9a8d9220e8a04e344 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 22 Jun 2022 15:14:48 +0530 Subject: [PATCH 10/16] updated unit tests --- index_test.go | 275 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 222 insertions(+), 53 deletions(-) diff --git a/index_test.go b/index_test.go index ad5c52711..6933ec188 100644 --- a/index_test.go +++ b/index_test.go @@ -230,6 +230,17 @@ func TestCrud(t *testing.T) { } } +func approxSame(actual, expected uint64) bool { + modulus := func(a, b uint64) uint64 { + if a > b { + return a - b + } + return b - a + } + + return float64(modulus(actual, expected))/float64(expected) < float64(0.25) +} + func TestBytesRead(t *testing.T) { tmpIndexPath := createTmpIndexPath(t) defer cleanupTmpIndexPath(t, tmpIndexPath) @@ -240,12 +251,17 @@ func TestBytesRead(t *testing.T) { documentMapping := NewDocumentMapping() indexMapping.AddDocumentMapping("hotel", documentMapping) indexMapping.StoreDynamic = false - FieldMapping := NewTextFieldMapping() - FieldMapping.Store = false - documentMapping.AddFieldMappingsAt("reviews.content", FieldMapping) - FieldMapping = NewTextFieldMapping() - FieldMapping.Store = true - documentMapping.AddFieldMappingsAt("type", FieldMapping) + indexMapping.DocValuesDynamic = false + contentFieldMapping := NewTextFieldMapping() + contentFieldMapping.Store = false + + reviewsMapping := NewDocumentMapping() + reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping) + documentMapping.AddSubDocumentMapping("reviews", reviewsMapping) + + typeFieldMapping := NewTextFieldMapping() + typeFieldMapping.Store = false + documentMapping.AddFieldMappingsAt("type", typeFieldMapping) idx, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) if err != nil { t.Fatal(err) @@ -258,22 +274,10 @@ func TestBytesRead(t *testing.T) { } }() - file, err := os.Open("sample.txt") - scanner := bufio.NewScanner(file) - batch := idx.NewBatch() - - type docStructure map[string]interface{} - - for scanner.Scan() { - var doc docStructure - docContent := (scanner.Text()) - json.Unmarshal([]byte(docContent), &doc) - err = batch.Index(fmt.Sprintf("%d", doc["id"]), doc) - if err != nil { - t.Fatalf("failed to create batch %v\n", err) - } + batch, err := getBatchFromData(idx, "sample.txt") + if err != nil { + t.Fatalf("failed to form a batch") } - err = idx.Batch(batch) if err != nil { t.Fatalf("failed to index batch %v\n", err) @@ -281,50 +285,63 @@ func TestBytesRead(t *testing.T) { query := NewQueryStringQuery("united") searchRequest := NewSearchRequestOptions(query, int(10), 0, true) - res, err := idx.Search(searchRequest) + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ := idx.StatsMap()["index"].(map[string]interface{}) prevBytesRead, _ := stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", prevBytesRead) + if prevBytesRead != 28618 { + t.Fatalf("expected bytes read for query string 28618, got %v", + prevBytesRead) + } - // checking for reusability - res, err = idx.Search(searchRequest) + // subsequent queries on the same field results in lesser amount + // of bytes read because the segment static and dictionary is reused and not + // loaded from mmap'd filed + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ := stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 23 { + t.Fatalf("expected bytes read for query string 23, got %v", + bytesRead-prevBytesRead) + } prevBytesRead = bytesRead - fmt.Printf("res hits %v\n", len(res.Hits)) - fuzz := NewFuzzyQuery("unitd") + fuzz := NewFuzzyQuery("hotel") + fuzz.FieldVal = "reviews.content" fuzz.Fuzziness = 2 searchRequest = NewSearchRequest(fuzz) - res, err = idx.Search(searchRequest) + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } - fmt.Printf("res hits %v\n", len(res.Hits)) stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v %v\n", stats["num_bytes_used_disk_by_root"], bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 16556 { + t.Fatalf("expected bytes read for fuzzy query is 16176, got %v\n", + bytesRead-prevBytesRead) + } prevBytesRead = bytesRead typeFacet := NewFacetRequest("type", 2) query = NewQueryStringQuery("united") searchRequest = NewSearchRequestOptions(query, int(0), 0, true) searchRequest.AddFacet("types", typeFacet) - res, err = idx.Search(searchRequest) + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + if !approxSame(bytesRead-prevBytesRead, 259) { + t.Fatalf("expected bytes read for faceted query is 259, got %v", + bytesRead-prevBytesRead) + } prevBytesRead = bytesRead min := float64(8000) @@ -332,70 +349,222 @@ func TestBytesRead(t *testing.T) { numRangeQuery := NewNumericRangeQuery(&min, &max) numRangeQuery.FieldVal = "id" searchRequest = NewSearchRequestOptions(numRangeQuery, int(10), 0, true) - res, err = idx.Search(searchRequest) + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 1678 { + t.Fatalf("expected bytes read for numeric range query is 1678, got %v", + bytesRead-prevBytesRead) + } prevBytesRead = bytesRead searchRequest = NewSearchRequestOptions(query, int(10), 0, true) - searchRequest.IncludeLocations = true - res, err = idx.Search(searchRequest) + searchRequest.Highlight = &HighlightRequest{} + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 676 { + t.Fatalf("expected bytes read for query with highlighter is 676, got %v", + bytesRead-prevBytesRead) + } prevBytesRead = bytesRead - searchRequest = NewSearchRequestOptions(query, int(10), 0, true) - searchRequest.Fields = []string{"type"} - res, err = idx.Search(searchRequest) + disQuery := NewDisjunctionQuery(NewMatchQuery("united"), NewMatchQuery("hotel")) + searchRequest = NewSearchRequestOptions(disQuery, int(10), 0, true) + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } + // expectation is that the bytes read is roughly equal to sum of sub queries in + // the disjunction query plus sum static value + stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + if bytesRead-prevBytesRead != 77 { + t.Fatalf("expected bytes read for disjunction query is 77, got %v", + bytesRead-prevBytesRead) + } +} + +func getBatchFromData(idx Index, fileName string) (*Batch, error) { + file, err := os.Open(fileName) + defer file.Close() + scanner := bufio.NewScanner(file) + batch := idx.NewBatch() + + type docStructure map[string]interface{} + + for scanner.Scan() { + var doc docStructure + docContent := (scanner.Text()) + json.Unmarshal([]byte(docContent), &doc) + err = batch.Index(fmt.Sprintf("%d", doc["id"]), doc) + if err != nil { + return nil, err + } + } + return batch, err +} + +func TestBytesReadStored(t *testing.T) { + tmpIndexPath := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath) + + indexMapping := NewIndexMapping() + indexMapping.TypeField = "type" + indexMapping.DefaultAnalyzer = "en" + documentMapping := NewDocumentMapping() + indexMapping.AddDocumentMapping("hotel", documentMapping) + + indexMapping.DocValuesDynamic = false + indexMapping.StoreDynamic = false + + contentFieldMapping := NewTextFieldMapping() + contentFieldMapping.Store = true + contentFieldMapping.IncludeInAll = false + contentFieldMapping.IncludeTermVectors = false + + reviewsMapping := NewDocumentMapping() + reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping) + documentMapping.AddSubDocumentMapping("reviews", reviewsMapping) + + typeFieldMapping := NewTextFieldMapping() + typeFieldMapping.Store = false + typeFieldMapping.IncludeInAll = false + typeFieldMapping.IncludeTermVectors = false + documentMapping.AddFieldMappingsAt("type", typeFieldMapping) + idx, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) + if err != nil { + t.Fatal(err) + } + batch, err := getBatchFromData(idx, "sample.txt") + if err != nil { + t.Fatalf("failed to form a batch %v\n", err) + } + err = idx.Batch(batch) + if err != nil { + t.Fatalf("failed to index batch %v\n", err) + } + query := NewTermQuery("hotel") + query.FieldVal = "reviews.content" + searchRequest := NewSearchRequestOptions(query, int(10), 0, true) + _, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ := idx.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + if bytesRead != 15792 { + t.Fatalf("expected the bytes read stat to be around 15792, got %v", err) + } + prevBytesRead := bytesRead + + searchRequest = NewSearchRequestOptions(query, int(10), 0, true) + _, err = idx.Search(searchRequest) + if err != nil { + t.Error(err) + } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) + if bytesRead-prevBytesRead != 15 { + t.Fatalf("expected the bytes read stat to be around 15, got %v", err) + } prevBytesRead = bytesRead searchRequest = NewSearchRequestOptions(query, int(10), 0, true) - searchRequest.Fields = []string{"type"} - res, err = idx.Search(searchRequest) + searchRequest.Fields = []string{"*"} + _, err = idx.Search(searchRequest) if err != nil { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read %v\n", bytesRead-prevBytesRead) - prevBytesRead = bytesRead - disQuery := NewDisjunctionQuery(NewMatchQuery("hotel"), NewMatchQuery("united")) - searchRequest = NewSearchRequestOptions(disQuery, int(10), 0, true) - res, err = idx.Search(searchRequest) + if bytesRead-prevBytesRead != 38278 { + t.Fatalf("expected the bytes read stat to be around 38278, got %v", err) + } + idx.Close() + cleanupTmpIndexPath(t, tmpIndexPath) + + // same type of querying but on field "type" + contentFieldMapping.Store = false + typeFieldMapping.Store = true + + tmpIndexPath1 := createTmpIndexPath(t) + defer cleanupTmpIndexPath(t, tmpIndexPath1) + + idx1, err := NewUsing(tmpIndexPath1, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil) + if err != nil { + t.Fatal(err) + } + defer func() { + err := idx1.Close() + if err != nil { + t.Fatal(err) + } + }() + + batch, err = getBatchFromData(idx1, "sample.txt") + if err != nil { + t.Fatalf("failed to form a batch %v\n", err) + } + err = idx1.Batch(batch) + if err != nil { + t.Fatalf("failed to index batch %v\n", err) + } + + query = NewTermQuery("hotel") + query.FieldVal = "type" + searchRequest = NewSearchRequestOptions(query, int(10), 0, true) + _, err = idx1.Search(searchRequest) if err != nil { t.Error(err) } - stats, _ = idx.StatsMap()["index"].(map[string]interface{}) + stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) - fmt.Printf("bytes read dis %v\n", bytesRead-prevBytesRead) + if bytesRead != 167 { + t.Fatalf("expected the bytes read stat to be around 167, got %v", bytesRead-prevBytesRead) + } prevBytesRead = bytesRead - t.Errorf("erorr") + _, err = idx1.Search(searchRequest) + if err != nil { + t.Error(err) + } + stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + if bytesRead-prevBytesRead != 12 { + t.Fatalf("expected the bytes read stat to be around 12, got %v", err) + } + prevBytesRead = bytesRead + + searchRequest.Fields = []string{"*"} + _, err = idx1.Search(searchRequest) + if err != nil { + t.Error(err) + } + + stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) + bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + + if bytesRead-prevBytesRead != 646 { + t.Fatalf("expected the bytes read stat to be around 646, got %v", err) + } } func TestIndexCreateNewOverExisting(t *testing.T) { tmpIndexPath := createTmpIndexPath(t) - defer cleanupTmpIndexPath(t, tmpIndexPath) index, err := New(tmpIndexPath, NewIndexMapping()) if err != nil { From 14beae40aafdcf97a6b6e9cbe4621351aa8a9b99 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Thu, 30 Jun 2022 18:15:58 +0530 Subject: [PATCH 11/16] code cleanup --- go.mod | 1 - go.sum | 2 - index/scorch/snapshot_index.go | 79 ++++++++++++++++++---------------- index_test.go | 2 +- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/go.mod b/go.mod index def97174c..2d0be65e9 100644 --- a/go.mod +++ b/go.mod @@ -26,6 +26,5 @@ require ( github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v0.0.5 go.etcd.io/bbolt v1.3.5 - golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 // indirect golang.org/x/text v0.3.7 ) diff --git a/go.sum b/go.sum index fb994c2d6..9623b9398 100644 --- a/go.sum +++ b/go.sum @@ -114,8 +114,6 @@ golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200202164722-d101bd2416d5/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6 h1:nonptSpoQ4vQjyraW20DXPAglgQfVnM9ZC6MmNLMR60= -golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 3d95106c9..13c0f2639 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -28,7 +28,7 @@ import ( "github.com/RoaringBitmap/roaring" "github.com/blevesearch/bleve/v2/document" index "github.com/blevesearch/bleve_index_api" - segmentl "github.com/blevesearch/scorch_segment_api/v2" + segment "github.com/blevesearch/scorch_segment_api/v2" "github.com/blevesearch/vellum" lev "github.com/blevesearch/vellum/levenshtein" bolt "go.etcd.io/bbolt" @@ -38,13 +38,13 @@ import ( var lb1, lb2 *lev.LevenshteinAutomatonBuilder type asynchSegmentResult struct { - dict segmentl.TermDictionary - dictItr segmentl.DictionaryIterator + dict segment.TermDictionary + dictItr segment.DictionaryIterator index int docs *roaring.Bitmap - postings segmentl.PostingsList + postings segment.PostingsList err error } @@ -60,6 +60,8 @@ var reflectStaticSizeIndexSnapshot int // in the kvConfig. var DefaultFieldTFRCacheThreshold uint64 = 10 +type bytesOffDiskStats segment.BytesOffDiskStats + func init() { var is interface{} = IndexSnapshot{} reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size()) @@ -140,21 +142,21 @@ func (i *IndexSnapshot) updateSize() { } func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, - makeItr func(i segmentl.TermDictionary) segmentl.DictionaryIterator, + makeItr func(i segment.TermDictionary) segment.DictionaryIterator, randomLookup bool) (*IndexSnapshotFieldDict, error) { results := make(chan *asynchSegmentResult) for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { var prevBytesRead uint64 - if seg, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if seg, ok := segment.segment.(bytesOffDiskStats); ok { prevBytesRead = seg.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { - if seg, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if seg, ok := segment.segment.(bytesOffDiskStats); ok { atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, seg.BytesRead()-prevBytesRead) } @@ -209,7 +211,7 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, } func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) { - return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(nil, nil, nil) }, false) } @@ -237,7 +239,7 @@ func calculateExclusiveEndFromInclusiveEnd(inclusiveEnd []byte) []byte { func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) { - return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { endTermExclusive := calculateExclusiveEndFromInclusiveEnd(endTerm) return i.AutomatonIterator(nil, startTerm, endTermExclusive) }, false) @@ -264,7 +266,7 @@ func calculateExclusiveEndFromPrefix(in []byte) []byte { func (i *IndexSnapshot) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) { termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix) - return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(nil, termPrefix, termPrefixEnd) }, false) } @@ -279,7 +281,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string, return nil, err } - return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) }, false) } @@ -307,7 +309,7 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string, prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg) } - return i.newIndexSnapshotFieldDict(field, func(i segmentl.TermDictionary) segmentl.DictionaryIterator { + return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator { return i.AutomatonIterator(a, prefixBeg, prefixEnd) }, false) } @@ -433,7 +435,7 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { rvd := document.NewDocument(id) var prevBytesRead uint64 - if seg, ok := i.segment[segmentIndex].segment.(segmentl.BytesOffDiskStats); ok { + if seg, ok := i.segment[segmentIndex].segment.(segment.BytesOffDiskStats); ok { prevBytesRead = seg.BytesRead() } err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { @@ -465,9 +467,8 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { if err != nil { return nil, err } - if seg, ok := i.segment[segmentIndex].segment.(segmentl.BytesOffDiskStats); ok { + if seg, ok := i.segment[segmentIndex].segment.(segment.BytesOffDiskStats); ok { delta := seg.BytesRead() - prevBytesRead - // log.Printf("stored field section %v\n", delta) atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, delta) } return rvd, nil @@ -529,10 +530,10 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.field = field rv.snapshot = is if rv.postings == nil { - rv.postings = make([]segmentl.PostingsList, len(is.segment)) + rv.postings = make([]segment.PostingsList, len(is.segment)) } if rv.iterators == nil { - rv.iterators = make([]segmentl.PostingsIterator, len(is.segment)) + rv.iterators = make([]segment.PostingsIterator, len(is.segment)) } rv.segmentOffset = 0 rv.includeFreq = includeFreq @@ -541,17 +542,17 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.currPosting = nil rv.currID = rv.currID[:0] if rv.dicts == nil { - rv.dicts = make([]segmentl.TermDictionary, len(is.segment)) + rv.dicts = make([]segment.TermDictionary, len(is.segment)) for i, segment := range is.segment { var prevBytesRead uint64 - if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if segP, ok := segment.segment.(bytesOffDiskStats); ok { prevBytesRead = segP.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } - if segP, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { + if segP, ok := segment.segment.(bytesOffDiskStats); ok { atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()-prevBytesRead) } rv.dicts[i] = dict @@ -560,8 +561,8 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, for i, segment := range is.segment { var prevBytesReadPL uint64 - if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - if postings, ok := rv.postings[i].(segmentl.BytesOffDiskStats); ok { + if _, ok := segment.segment.(bytesOffDiskStats); ok { + if postings, ok := rv.postings[i].(bytesOffDiskStats); ok { prevBytesReadPL = postings.BytesRead() } } @@ -571,20 +572,24 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, } rv.postings[i] = pl var prevBytesReadItr uint64 - if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok { + if _, ok := segment.segment.(bytesOffDiskStats); ok { + if itr, ok := rv.iterators[i].(bytesOffDiskStats); ok { prevBytesReadItr = itr.BytesRead() } } rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) - if _, ok := segment.segment.(segmentl.BytesOffDiskStats); ok { - if postings, ok := pl.(segmentl.BytesOffDiskStats); ok && prevBytesReadPL < postings.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, postings.BytesRead()-prevBytesReadPL) + if _, ok := segment.segment.(bytesOffDiskStats); ok { + if postings, ok := pl.(bytesOffDiskStats); ok && + prevBytesReadPL < postings.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + postings.BytesRead()-prevBytesReadPL) } - if itr, ok := rv.iterators[i].(segmentl.BytesOffDiskStats); ok && prevBytesReadItr < itr.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, itr.BytesRead()-prevBytesReadItr) + if itr, ok := rv.iterators[i].(bytesOffDiskStats); ok && + prevBytesReadItr < itr.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + itr.BytesRead()-prevBytesReadItr) } } } @@ -668,13 +673,13 @@ func docInternalToNumber(in index.IndexInternalID) (uint64, error) { func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( segmentIndex int, localDocNum uint64, fields []string, cFields []string, - visitor index.DocValueVisitor, dvs segmentl.DocVisitState) ( - cFieldsOut []string, dvsOut segmentl.DocVisitState, err error) { + visitor index.DocValueVisitor, dvs segment.DocVisitState) ( + cFieldsOut []string, dvsOut segment.DocVisitState, err error) { ss := i.segment[segmentIndex] var vFields []string // fields that are visitable via the segment - ssv, ssvOk := ss.segment.(segmentl.DocValueVisitable) + ssv, ssvOk := ss.segment.(segment.DocValueVisitable) if ssvOk && ssv != nil { vFields, err = ssv.VisitableDocValueFields() if err != nil { @@ -706,14 +711,14 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( if ssvOk && ssv != nil && len(vFields) > 0 { var prevBytesRead uint64 - if ssvp, ok := ssv.(segmentl.BytesOffDiskStats); ok { + if ssvp, ok := ssv.(segment.BytesOffDiskStats); ok { prevBytesRead = ssvp.BytesRead() } dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } - if ssvp, ok := ssv.(segmentl.BytesOffDiskStats); ok { + if ssvp, ok := ssv.(segment.BytesOffDiskStats); ok { atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, ssvp.BytesRead()-prevBytesRead) } } @@ -740,7 +745,7 @@ func (i *IndexSnapshot) DocValueReader(fields []string) ( type DocValueReader struct { i *IndexSnapshot fields []string - dvs segmentl.DocVisitState + dvs segment.DocVisitState currSegmentIndex int currCachedFields []string @@ -796,7 +801,7 @@ func (i *IndexSnapshot) DumpFields() chan interface{} { func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} { rv := make(map[string]struct{}, len(i.segment)) for _, segmentSnapshot := range i.segment { - if seg, ok := segmentSnapshot.segment.(segmentl.PersistedSegment); ok { + if seg, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok { rv[seg.Path()] = struct{}{} } } @@ -808,7 +813,7 @@ func (i *IndexSnapshot) diskSegmentsPaths() map[string]struct{} { func (i *IndexSnapshot) reClaimableDocsRatio() float64 { var totalCount, liveCount uint64 for _, segmentSnapshot := range i.segment { - if _, ok := segmentSnapshot.segment.(segmentl.PersistedSegment); ok { + if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok { totalCount += uint64(segmentSnapshot.FullSize()) liveCount += uint64(segmentSnapshot.Count()) } diff --git a/index_test.go b/index_test.go index 6933ec188..67f86bdec 100644 --- a/index_test.go +++ b/index_test.go @@ -322,7 +322,7 @@ func TestBytesRead(t *testing.T) { stats, _ = idx.StatsMap()["index"].(map[string]interface{}) bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) if bytesRead-prevBytesRead != 16556 { - t.Fatalf("expected bytes read for fuzzy query is 16176, got %v\n", + t.Fatalf("expected bytes read for fuzzy query is 16556, got %v\n", bytesRead-prevBytesRead) } prevBytesRead = bytesRead From b1ef31f160107b9121f5a9d0c478f1d9de22a6e6 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 6 Jul 2022 09:53:45 +0530 Subject: [PATCH 12/16] refactoring and code cleanup --- index/scorch/merge.go | 4 +-- index/scorch/scorch.go | 8 ++--- index/scorch/snapshot_index.go | 55 +++++++++++++++--------------- index/scorch/snapshot_index_tfr.go | 5 +-- 4 files changed, 34 insertions(+), 38 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 259978e5d..0f996f274 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -355,7 +355,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context, } switch segI := seg.(type) { - case segment.BytesOffDiskStats: + case segment.DiskStatsReporter: totalBytesRead := segI.BytesRead() + prevBytesReadTotal segI.SetBytesRead(totalBytesRead) seg = segI.(segment.Segment) @@ -438,7 +438,7 @@ type segmentMerge struct { func cumulateBytesRead(sbs []segment.Segment) uint64 { rv := uint64(0) for _, seg := range sbs { - if segI, ok := seg.(segment.BytesOffDiskStats); ok { + if segI, ok := seg.(segment.DiskStatsReporter); ok { rv += segI.BytesRead() } } diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index f4c897c6d..a4df3d898 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -535,14 +535,10 @@ func (s *Scorch) Stats() json.Marshaler { return &s.stats } -func (s *Scorch) BytesRead() uint64 { +func (s *Scorch) BytesReadQueryTime() uint64 { return s.stats.TotBytesReadQueryTime } -func (s *Scorch) SetBytesRead(val uint64) { - atomic.StoreUint64(&s.stats.TotBytesReadQueryTime, val) -} - func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64, uint64, uint64) { var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64 @@ -602,7 +598,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["term_searchers_finished"] = m["TotTermSearchersFinished"] m["num_bytes_read_query_time"] = m["TotBytesReadQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] - m["num_analysis_bytes_indexed"] = m["TotIndexedAnalysisBytes"] + m["num_bytes_indexed_after_analysis"] = m["TotIndexedAnalysisBytes"] m["num_items_introduced"] = m["TotIntroducedItems"] m["num_items_persisted"] = m["TotPersistedItems"] m["num_recs_to_persist"] = m["TotItemsToPersist"] diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 13c0f2639..b242e36a3 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -60,7 +60,7 @@ var reflectStaticSizeIndexSnapshot int // in the kvConfig. var DefaultFieldTFRCacheThreshold uint64 = 10 -type bytesOffDiskStats segment.BytesOffDiskStats +type diskStatsReporter segment.DiskStatsReporter func init() { var is interface{} = IndexSnapshot{} @@ -149,14 +149,14 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { var prevBytesRead uint64 - if seg, ok := segment.segment.(bytesOffDiskStats); ok { + if seg, ok := segment.segment.(diskStatsReporter); ok { prevBytesRead = seg.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { - if seg, ok := segment.segment.(bytesOffDiskStats); ok { + if seg, ok := segment.segment.(diskStatsReporter); ok { atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, seg.BytesRead()-prevBytesRead) } @@ -435,7 +435,8 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { rvd := document.NewDocument(id) var prevBytesRead uint64 - if seg, ok := i.segment[segmentIndex].segment.(segment.BytesOffDiskStats); ok { + seg, ok := i.segment[segmentIndex].segment.(segment.DiskStatsReporter) + if ok { prevBytesRead = seg.BytesRead() } err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { @@ -467,7 +468,7 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { if err != nil { return nil, err } - if seg, ok := i.segment[segmentIndex].segment.(segment.BytesOffDiskStats); ok { + if ok { delta := seg.BytesRead() - prevBytesRead atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, delta) } @@ -541,18 +542,20 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.includeTermVectors = includeTermVectors rv.currPosting = nil rv.currID = rv.currID[:0] + if rv.dicts == nil { rv.dicts = make([]segment.TermDictionary, len(is.segment)) for i, segment := range is.segment { var prevBytesRead uint64 - if segP, ok := segment.segment.(bytesOffDiskStats); ok { + segP, ok := segment.segment.(diskStatsReporter) + if ok { prevBytesRead = segP.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } - if segP, ok := segment.segment.(bytesOffDiskStats); ok { + if ok { atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()-prevBytesRead) } rv.dicts[i] = dict @@ -561,36 +564,31 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, for i, segment := range is.segment { var prevBytesReadPL uint64 - if _, ok := segment.segment.(bytesOffDiskStats); ok { - if postings, ok := rv.postings[i].(bytesOffDiskStats); ok { - prevBytesReadPL = postings.BytesRead() - } + if postings, ok := rv.postings[i].(diskStatsReporter); ok { + prevBytesReadPL = postings.BytesRead() } pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i]) if err != nil { return nil, err } rv.postings[i] = pl + var prevBytesReadItr uint64 - if _, ok := segment.segment.(bytesOffDiskStats); ok { - if itr, ok := rv.iterators[i].(bytesOffDiskStats); ok { - prevBytesReadItr = itr.BytesRead() - } + if itr, ok := rv.iterators[i].(diskStatsReporter); ok { + prevBytesReadItr = itr.BytesRead() } rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) - if _, ok := segment.segment.(bytesOffDiskStats); ok { - if postings, ok := pl.(bytesOffDiskStats); ok && - prevBytesReadPL < postings.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, - postings.BytesRead()-prevBytesReadPL) - } + if postings, ok := pl.(diskStatsReporter); ok && + prevBytesReadPL < postings.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + postings.BytesRead()-prevBytesReadPL) + } - if itr, ok := rv.iterators[i].(bytesOffDiskStats); ok && - prevBytesReadItr < itr.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, - itr.BytesRead()-prevBytesReadItr) - } + if itr, ok := rv.iterators[i].(diskStatsReporter); ok && + prevBytesReadItr < itr.BytesRead() { + atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + itr.BytesRead()-prevBytesReadItr) } } atomic.AddUint64(&is.parent.stats.TotTermSearchersStarted, uint64(1)) @@ -711,14 +709,15 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( if ssvOk && ssv != nil && len(vFields) > 0 { var prevBytesRead uint64 - if ssvp, ok := ssv.(segment.BytesOffDiskStats); ok { + ssvp, ok := ssv.(segment.DiskStatsReporter) + if ok { prevBytesRead = ssvp.BytesRead() } dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } - if ssvp, ok := ssv.(segment.BytesOffDiskStats); ok { + if ok { atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, ssvp.BytesRead()-prevBytesRead) } } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index d66bafd75..d13c46c1f 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -77,7 +77,8 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // find the next hit for i.segmentOffset < len(i.iterators) { prevBytesRead := uint64(0) - if itr, ok := i.iterators[i.segmentOffset].(segment.BytesOffDiskStats); ok { + itr, ok := i.iterators[i.segmentOffset].(segment.DiskStatsReporter) + if ok { prevBytesRead = itr.BytesRead() } next, err := i.iterators[i.segmentOffset].Next() @@ -97,7 +98,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // this is because there are chances of having a series of loadChunk calls, // and they have to be added together before sending the bytesRead at this point // upstream. - if itr, ok := i.iterators[i.segmentOffset].(segment.BytesOffDiskStats); ok { + if ok { delta := itr.BytesRead() - prevBytesRead atomic.AddUint64(&i.snapshot.parent.stats.TotBytesReadQueryTime, uint64(delta)) } From 342884d3f21d8f8eb58535a1cc8228f85c1f957a Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Fri, 8 Jul 2022 09:59:59 +0530 Subject: [PATCH 13/16] updated the scorch_segment_api version --- go.mod | 2 +- go.sum | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 2d0be65e9..9d8e3f32b 100644 --- a/go.mod +++ b/go.mod @@ -11,7 +11,7 @@ require ( github.com/blevesearch/go-porterstemmer v1.0.3 github.com/blevesearch/goleveldb v1.0.1 github.com/blevesearch/gtreap v0.1.1 - github.com/blevesearch/scorch_segment_api/v2 v2.1.0 + github.com/blevesearch/scorch_segment_api/v2 v2.1.1 github.com/blevesearch/segment v0.9.0 github.com/blevesearch/snowball v0.6.1 github.com/blevesearch/snowballstem v0.9.0 diff --git a/go.sum b/go.sum index 9623b9398..28e57e109 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,9 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/blevesearch/mmap-go v1.0.3 h1:7QkALgFNooSq3a46AE+pWeKASAZc9SiNFJhDGF1NDx4= github.com/blevesearch/mmap-go v1.0.3/go.mod h1:pYvKl/grLQrBxuaRYgoTssa4rVujYYeenDp++2E+yvs= -github.com/blevesearch/scorch_segment_api/v2 v2.1.0 h1:NFwteOpZEvJk5Vg0H6gD0hxupsG3JYocE4DBvsA2GZI= github.com/blevesearch/scorch_segment_api/v2 v2.1.0/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE= +github.com/blevesearch/scorch_segment_api/v2 v2.1.1 h1:J8UDudUpDJz21d/hCMIshCeRordwnDTftgXcSDMUx40= +github.com/blevesearch/scorch_segment_api/v2 v2.1.1/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE= github.com/blevesearch/segment v0.9.0 h1:5lG7yBCx98or7gK2cHMKPukPZ/31Kag7nONpoBt22Ac= github.com/blevesearch/segment v0.9.0/go.mod h1:9PfHYUdQCgHktBgvtUOF4x+pc4/l8rdH0u5spnW85UQ= github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A= From aae6b86d99fb42f825ca19876338c2c75cb1821c Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Mon, 11 Jul 2022 12:27:30 +0530 Subject: [PATCH 14/16] refactoring and code cleanup --- index/scorch/merge.go | 2 +- index/scorch/scorch.go | 8 ++++---- index/scorch/snapshot_index.go | 17 +++++++++-------- index/scorch/snapshot_index_tfr.go | 2 +- index/scorch/stats.go | 6 +++--- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/index/scorch/merge.go b/index/scorch/merge.go index 0f996f274..d0b7c5535 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -436,7 +436,7 @@ type segmentMerge struct { } func cumulateBytesRead(sbs []segment.Segment) uint64 { - rv := uint64(0) + var rv uint64 for _, seg := range sbs { if segI, ok := seg.(segment.DiskStatsReporter); ok { rv += segI.BytesRead() diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index a4df3d898..1e69946db 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -401,7 +401,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) { analysisResults[itemsDeQueued] = result itemsDeQueued++ result.VisitFields(func(f index.Field) { - atomic.AddUint64(&s.stats.TotIndexedAnalysisBytes, + atomic.AddUint64(&s.stats.TotBytesIndexedAfterAnalysis, analysisBytes(f.AnalyzedTokenFrequencies())) }) } @@ -536,7 +536,7 @@ func (s *Scorch) Stats() json.Marshaler { } func (s *Scorch) BytesReadQueryTime() uint64 { - return s.stats.TotBytesReadQueryTime + return s.stats.TotBytesReadAtQueryTime } func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64, @@ -596,9 +596,9 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["index_time"] = m["TotIndexTime"] m["term_searchers_started"] = m["TotTermSearchersStarted"] m["term_searchers_finished"] = m["TotTermSearchersFinished"] - m["num_bytes_read_query_time"] = m["TotBytesReadQueryTime"] + m["num_bytes_read_query_time"] = m["TotBytesReadAtQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] - m["num_bytes_indexed_after_analysis"] = m["TotIndexedAnalysisBytes"] + m["num_bytes_indexed_after_analysis"] = m["TotBytesIndexedAfterAnalysis"] m["num_items_introduced"] = m["TotIntroducedItems"] m["num_items_persisted"] = m["TotPersistedItems"] m["num_recs_to_persist"] = m["TotItemsToPersist"] diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index b242e36a3..81c91d338 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -149,15 +149,16 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { var prevBytesRead uint64 - if seg, ok := segment.segment.(diskStatsReporter); ok { + seg, ok := segment.segment.(diskStatsReporter) + if ok { prevBytesRead = seg.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { - if seg, ok := segment.segment.(diskStatsReporter); ok { - atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, + if ok { + atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, seg.BytesRead()-prevBytesRead) } if randomLookup { @@ -470,7 +471,7 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { } if ok { delta := seg.BytesRead() - prevBytesRead - atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, delta) + atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, delta) } return rvd, nil } @@ -556,7 +557,7 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, return nil, err } if ok { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, segP.BytesRead()-prevBytesRead) + atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, segP.BytesRead()-prevBytesRead) } rv.dicts[i] = dict } @@ -581,13 +582,13 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, if postings, ok := pl.(diskStatsReporter); ok && prevBytesReadPL < postings.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, postings.BytesRead()-prevBytesReadPL) } if itr, ok := rv.iterators[i].(diskStatsReporter); ok && prevBytesReadItr < itr.BytesRead() { - atomic.AddUint64(&is.parent.stats.TotBytesReadQueryTime, + atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, itr.BytesRead()-prevBytesReadItr) } } @@ -718,7 +719,7 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( return nil, nil, err } if ok { - atomic.AddUint64(&i.parent.stats.TotBytesReadQueryTime, ssvp.BytesRead()-prevBytesRead) + atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, ssvp.BytesRead()-prevBytesRead) } } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index d13c46c1f..f452a8682 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -100,7 +100,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // upstream. if ok { delta := itr.BytesRead() - prevBytesRead - atomic.AddUint64(&i.snapshot.parent.stats.TotBytesReadQueryTime, uint64(delta)) + atomic.AddUint64(&i.snapshot.parent.stats.TotBytesReadAtQueryTime, uint64(delta)) } return rv, nil diff --git a/index/scorch/stats.go b/index/scorch/stats.go index f15740cbb..98e614ac6 100644 --- a/index/scorch/stats.go +++ b/index/scorch/stats.go @@ -42,9 +42,9 @@ type Stats struct { TotAnalysisTime uint64 TotIndexTime uint64 - TotBytesReadQueryTime uint64 - TotIndexedPlainTextBytes uint64 - TotIndexedAnalysisBytes uint64 + TotBytesReadAtQueryTime uint64 + TotIndexedPlainTextBytes uint64 + TotBytesIndexedAfterAnalysis uint64 TotTermSearchersStarted uint64 TotTermSearchersFinished uint64 From 05c5ea133401b0b523c57494287db25de5048ed5 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 13 Jul 2022 11:58:35 +0530 Subject: [PATCH 15/16] renaming num_bytes_read_query_time -> num_bytes_read_at_query_time --- index/scorch/scorch.go | 2 +- index_test.go | 26 +++++++++++++------------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/index/scorch/scorch.go b/index/scorch/scorch.go index 1e69946db..534ae2049 100644 --- a/index/scorch/scorch.go +++ b/index/scorch/scorch.go @@ -596,7 +596,7 @@ func (s *Scorch) StatsMap() map[string]interface{} { m["index_time"] = m["TotIndexTime"] m["term_searchers_started"] = m["TotTermSearchersStarted"] m["term_searchers_finished"] = m["TotTermSearchersFinished"] - m["num_bytes_read_query_time"] = m["TotBytesReadAtQueryTime"] + m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"] m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"] m["num_bytes_indexed_after_analysis"] = m["TotBytesIndexedAfterAnalysis"] m["num_items_introduced"] = m["TotIntroducedItems"] diff --git a/index_test.go b/index_test.go index 67f86bdec..e5b8c6288 100644 --- a/index_test.go +++ b/index_test.go @@ -290,7 +290,7 @@ func TestBytesRead(t *testing.T) { t.Error(err) } stats, _ := idx.StatsMap()["index"].(map[string]interface{}) - prevBytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) if prevBytesRead != 28618 { t.Fatalf("expected bytes read for query string 28618, got %v", prevBytesRead) @@ -304,7 +304,7 @@ func TestBytesRead(t *testing.T) { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 23 { t.Fatalf("expected bytes read for query string 23, got %v", bytesRead-prevBytesRead) @@ -320,7 +320,7 @@ func TestBytesRead(t *testing.T) { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 16556 { t.Fatalf("expected bytes read for fuzzy query is 16556, got %v\n", bytesRead-prevBytesRead) @@ -337,7 +337,7 @@ func TestBytesRead(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if !approxSame(bytesRead-prevBytesRead, 259) { t.Fatalf("expected bytes read for faceted query is 259, got %v", bytesRead-prevBytesRead) @@ -355,7 +355,7 @@ func TestBytesRead(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 1678 { t.Fatalf("expected bytes read for numeric range query is 1678, got %v", bytesRead-prevBytesRead) @@ -370,7 +370,7 @@ func TestBytesRead(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 676 { t.Fatalf("expected bytes read for query with highlighter is 676, got %v", bytesRead-prevBytesRead) @@ -386,7 +386,7 @@ func TestBytesRead(t *testing.T) { // expectation is that the bytes read is roughly equal to sum of sub queries in // the disjunction query plus sum static value stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 77 { t.Fatalf("expected bytes read for disjunction query is 77, got %v", bytesRead-prevBytesRead) @@ -462,7 +462,7 @@ func TestBytesReadStored(t *testing.T) { } stats, _ := idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ := stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64) if bytesRead != 15792 { t.Fatalf("expected the bytes read stat to be around 15792, got %v", err) } @@ -474,7 +474,7 @@ func TestBytesReadStored(t *testing.T) { t.Error(err) } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 15 { t.Fatalf("expected the bytes read stat to be around 15, got %v", err) } @@ -488,7 +488,7 @@ func TestBytesReadStored(t *testing.T) { } stats, _ = idx.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 38278 { t.Fatalf("expected the bytes read stat to be around 38278, got %v", err) @@ -532,7 +532,7 @@ func TestBytesReadStored(t *testing.T) { } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead != 167 { t.Fatalf("expected the bytes read stat to be around 167, got %v", bytesRead-prevBytesRead) } @@ -543,7 +543,7 @@ func TestBytesReadStored(t *testing.T) { t.Error(err) } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 12 { t.Fatalf("expected the bytes read stat to be around 12, got %v", err) } @@ -556,7 +556,7 @@ func TestBytesReadStored(t *testing.T) { } stats, _ = idx1.StatsMap()["index"].(map[string]interface{}) - bytesRead, _ = stats["num_bytes_read_query_time"].(uint64) + bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64) if bytesRead-prevBytesRead != 646 { t.Fatalf("expected the bytes read stat to be around 646, got %v", err) From 6284a489e7f04929aed42baf5b74b96d11cae899 Mon Sep 17 00:00:00 2001 From: Thejas-bhat Date: Wed, 13 Jul 2022 21:24:05 +0530 Subject: [PATCH 16/16] updated zapx version, code cleanup --- go.mod | 2 +- go.sum | 5 ++--- index/scorch/merge.go | 2 +- index/scorch/snapshot_index.go | 32 +++++++++++++++--------------- index/scorch/snapshot_index_tfr.go | 6 +++--- 5 files changed, 23 insertions(+), 24 deletions(-) diff --git a/go.mod b/go.mod index 1a794a3d1..0cef85862 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( github.com/blevesearch/zapx/v12 v12.3.4 github.com/blevesearch/zapx/v13 v13.3.4 github.com/blevesearch/zapx/v14 v14.3.4 - github.com/blevesearch/zapx/v15 v15.3.4 + github.com/blevesearch/zapx/v15 v15.3.5-0.20220713163830-ae843e553177 github.com/couchbase/moss v0.2.0 github.com/golang/protobuf v1.3.2 github.com/spf13/cobra v0.0.5 diff --git a/go.sum b/go.sum index 38a8fa73c..9e57418ab 100644 --- a/go.sum +++ b/go.sum @@ -20,7 +20,6 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA= github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc= github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs= -github.com/blevesearch/scorch_segment_api/v2 v2.1.0 h1:NFwteOpZEvJk5Vg0H6gD0hxupsG3JYocE4DBvsA2GZI= github.com/blevesearch/scorch_segment_api/v2 v2.1.0/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE= github.com/blevesearch/scorch_segment_api/v2 v2.1.1 h1:J8UDudUpDJz21d/hCMIshCeRordwnDTftgXcSDMUx40= github.com/blevesearch/scorch_segment_api/v2 v2.1.1/go.mod h1:uch7xyyO/Alxkuxa+CGs79vw0QY8BENSBjg6Mw5L5DE= @@ -42,8 +41,8 @@ github.com/blevesearch/zapx/v13 v13.3.4 h1:f646k6300VGRIR7eJ6lLtF8UC95NIWmF899j4 github.com/blevesearch/zapx/v13 v13.3.4/go.mod h1:Wl7hO1gT+IDvJb7i06g2iW5Qvw0KzncJPsBx7WGWhLA= github.com/blevesearch/zapx/v14 v14.3.4 h1:/FVzSGFG5rbVWfPEqlcaJd8lZSJMQpTdmFhz/l2QI7w= github.com/blevesearch/zapx/v14 v14.3.4/go.mod h1:b1YhRXXhAj9i+9aOwhRKCHUmJyYieK/QbDvPJDLddUk= -github.com/blevesearch/zapx/v15 v15.3.4 h1:/y6AOxRuBiZPFAItqcrKcXPPtlAwuW/jMoOFO7tc7rs= -github.com/blevesearch/zapx/v15 v15.3.4/go.mod h1:TQ/qDC2q7TSSpeC6Vgr9fDN56Ra0u49lZJQ4v30WEx4= +github.com/blevesearch/zapx/v15 v15.3.5-0.20220713163830-ae843e553177 h1:0/WYF9nS1HBgDc3z7ePdDz15CwMuNYZ4WeD7Kravm7M= +github.com/blevesearch/zapx/v15 v15.3.5-0.20220713163830-ae843e553177/go.mod h1:ii4ohMQC0TCUjYfq8OtrbABgeI1zljjyXBFpUe/dPDw= github.com/coreos/etcd v3.3.10+incompatible/go.mod h1:uF7uidLiAD3TWHmW31ZFd/JWoc32PjwdhPthX9715RE= github.com/coreos/go-etcd v2.0.0+incompatible/go.mod h1:Jez6KQU2B/sWsbdaef3ED8NzMklzPG4d5KIOhIy30Tk= github.com/coreos/go-semver v0.2.0/go.mod h1:nnelYz7RCh+5ahJtPPxZlU+153eP4D4r3EedlOD2RNk= diff --git a/index/scorch/merge.go b/index/scorch/merge.go index d0b7c5535..b9a00295a 100644 --- a/index/scorch/merge.go +++ b/index/scorch/merge.go @@ -438,7 +438,7 @@ type segmentMerge struct { func cumulateBytesRead(sbs []segment.Segment) uint64 { var rv uint64 for _, seg := range sbs { - if segI, ok := seg.(segment.DiskStatsReporter); ok { + if segI, diskStatsAvailable := seg.(segment.DiskStatsReporter); diskStatsAvailable { rv += segI.BytesRead() } } diff --git a/index/scorch/snapshot_index.go b/index/scorch/snapshot_index.go index 81c91d338..1bd369920 100644 --- a/index/scorch/snapshot_index.go +++ b/index/scorch/snapshot_index.go @@ -149,15 +149,15 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, for index, segment := range i.segment { go func(index int, segment *SegmentSnapshot) { var prevBytesRead uint64 - seg, ok := segment.segment.(diskStatsReporter) - if ok { + seg, diskStatsAvailable := segment.segment.(diskStatsReporter) + if diskStatsAvailable { prevBytesRead = seg.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { results <- &asynchSegmentResult{err: err} } else { - if ok { + if diskStatsAvailable { atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, seg.BytesRead()-prevBytesRead) } @@ -436,8 +436,8 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { rvd := document.NewDocument(id) var prevBytesRead uint64 - seg, ok := i.segment[segmentIndex].segment.(segment.DiskStatsReporter) - if ok { + seg, diskStatsAvailable := i.segment[segmentIndex].segment.(segment.DiskStatsReporter) + if diskStatsAvailable { prevBytesRead = seg.BytesRead() } err = i.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool { @@ -469,7 +469,7 @@ func (i *IndexSnapshot) Document(id string) (rv index.Document, err error) { if err != nil { return nil, err } - if ok { + if diskStatsAvailable { delta := seg.BytesRead() - prevBytesRead atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, delta) } @@ -548,15 +548,15 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.dicts = make([]segment.TermDictionary, len(is.segment)) for i, segment := range is.segment { var prevBytesRead uint64 - segP, ok := segment.segment.(diskStatsReporter) - if ok { + segP, diskStatsAvailable := segment.segment.(diskStatsReporter) + if diskStatsAvailable { prevBytesRead = segP.BytesRead() } dict, err := segment.segment.Dictionary(field) if err != nil { return nil, err } - if ok { + if diskStatsAvailable { atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, segP.BytesRead()-prevBytesRead) } rv.dicts[i] = dict @@ -565,7 +565,7 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, for i, segment := range is.segment { var prevBytesReadPL uint64 - if postings, ok := rv.postings[i].(diskStatsReporter); ok { + if postings, diskStatsAvailable := rv.postings[i].(diskStatsReporter); diskStatsAvailable { prevBytesReadPL = postings.BytesRead() } pl, err := rv.dicts[i].PostingsList(term, segment.deleted, rv.postings[i]) @@ -575,18 +575,18 @@ func (is *IndexSnapshot) TermFieldReader(term []byte, field string, includeFreq, rv.postings[i] = pl var prevBytesReadItr uint64 - if itr, ok := rv.iterators[i].(diskStatsReporter); ok { + if itr, diskStatsAvailable := rv.iterators[i].(diskStatsReporter); diskStatsAvailable { prevBytesReadItr = itr.BytesRead() } rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i]) - if postings, ok := pl.(diskStatsReporter); ok && + if postings, diskStatsAvailable := pl.(diskStatsReporter); diskStatsAvailable && prevBytesReadPL < postings.BytesRead() { atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, postings.BytesRead()-prevBytesReadPL) } - if itr, ok := rv.iterators[i].(diskStatsReporter); ok && + if itr, diskStatsAvailable := rv.iterators[i].(diskStatsReporter); diskStatsAvailable && prevBytesReadItr < itr.BytesRead() { atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, itr.BytesRead()-prevBytesReadItr) @@ -710,15 +710,15 @@ func (i *IndexSnapshot) documentVisitFieldTermsOnSegment( if ssvOk && ssv != nil && len(vFields) > 0 { var prevBytesRead uint64 - ssvp, ok := ssv.(segment.DiskStatsReporter) - if ok { + ssvp, diskStatsAvailable := ssv.(segment.DiskStatsReporter) + if diskStatsAvailable { prevBytesRead = ssvp.BytesRead() } dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs) if err != nil { return nil, nil, err } - if ok { + if diskStatsAvailable { atomic.AddUint64(&i.parent.stats.TotBytesReadAtQueryTime, ssvp.BytesRead()-prevBytesRead) } } diff --git a/index/scorch/snapshot_index_tfr.go b/index/scorch/snapshot_index_tfr.go index f452a8682..7283b1371 100644 --- a/index/scorch/snapshot_index_tfr.go +++ b/index/scorch/snapshot_index_tfr.go @@ -77,8 +77,8 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // find the next hit for i.segmentOffset < len(i.iterators) { prevBytesRead := uint64(0) - itr, ok := i.iterators[i.segmentOffset].(segment.DiskStatsReporter) - if ok { + itr, diskStatsAvailable := i.iterators[i.segmentOffset].(segment.DiskStatsReporter) + if diskStatsAvailable { prevBytesRead = itr.BytesRead() } next, err := i.iterators[i.segmentOffset].Next() @@ -98,7 +98,7 @@ func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*in // this is because there are chances of having a series of loadChunk calls, // and they have to be added together before sending the bytesRead at this point // upstream. - if ok { + if diskStatsAvailable { delta := itr.BytesRead() - prevBytesRead atomic.AddUint64(&i.snapshot.parent.stats.TotBytesReadAtQueryTime, uint64(delta)) }