Skip to content

Commit 96a016b

Browse files
authored
Returning bytesRead per query as part of searchResult (#142)
* refactoring for bytes read to be returned per query * - fixing the doc values' bytesRead tracking by avoid the double counting of stuff - fixing the double counting of a segment's loading portion of bytes read * scraped the atomics operations for bytesRead tracking. * code cleanup * updating go.mod and go.sum * bug fix: implementing BytesRead for stubDocument
1 parent 3040054 commit 96a016b

9 files changed

+73
-46
lines changed

dict.go

+18
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ type Dictionary struct {
3030
fieldID uint16
3131
fst *vellum.FST
3232
fstReader *vellum.Reader
33+
34+
bytesRead uint64
3335
}
3436

3537
// represents an immutable, empty dictionary
@@ -125,6 +127,22 @@ func (d *Dictionary) AutomatonIterator(a segment.Automaton,
125127
return emptyDictionaryIterator
126128
}
127129

130+
func (d *Dictionary) incrementBytesRead(val uint64) {
131+
d.bytesRead += val
132+
}
133+
134+
func (d *Dictionary) BytesRead() uint64 {
135+
return d.bytesRead
136+
}
137+
138+
func (d *Dictionary) ResetBytesRead(val uint64) {
139+
d.bytesRead = val
140+
}
141+
142+
func (d *Dictionary) BytesWritten() uint64 {
143+
return 0
144+
}
145+
128146
// DictionaryIterator is an iterator for term dictionary
129147
type DictionaryIterator struct {
130148
d *Dictionary

doc_test.go

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ type stubDocument struct {
1212
composite []*stubField
1313
}
1414

15+
func (s *stubDocument) StoredFieldsBytes() uint64 {
16+
return 0
17+
}
18+
1519
func (s *stubDocument) ID() string {
1620
return s.id
1721
}

docvalues.go

+33-21
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ import (
2121
"math"
2222
"reflect"
2323
"sort"
24-
"sync/atomic"
2524

2625
index "github.com/blevesearch/bleve_index_api"
2726
segment "github.com/blevesearch/scorch_segment_api/v2"
@@ -40,6 +39,31 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
4039
type docVisitState struct {
4140
dvrs map[uint16]*docValueReader
4241
segment *SegmentBase
42+
43+
bytesRead uint64
44+
}
45+
46+
// Implements the segment.DiskStatsReporter interface
47+
// The purpose of this implementation is to get
48+
// the bytes read from the disk (pertaining to the
49+
// docvalues) while querying.
50+
// the loadDvChunk retrieves the next chunk of docvalues
51+
// and the bytes retrieved off the disk pertaining to that
52+
// is accounted as well.
53+
func (d *docVisitState) incrementBytesRead(val uint64) {
54+
d.bytesRead += val
55+
}
56+
57+
func (d *docVisitState) BytesRead() uint64 {
58+
return d.bytesRead
59+
}
60+
61+
func (d *docVisitState) BytesWritten() uint64 {
62+
return 0
63+
}
64+
65+
func (d *docVisitState) ResetBytesRead(val uint64) {
66+
d.bytesRead = val
4367
}
4468

4569
type docValueReader struct {
@@ -131,27 +155,12 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
131155
return fdvIter, nil
132156
}
133157

134-
// Implements the segment.DiskStatsReporter interface
135-
// The purpose of this implementation is to get
136-
// the bytes read from the disk (pertaining to the
137-
// docvalues) while querying.
138-
// the loadDvChunk retrieves the next chunk of docvalues
139-
// and the bytes retrieved off the disk pertaining to that
140-
// is accounted as well.
141-
func (di *docValueReader) BytesRead() uint64 {
142-
return atomic.LoadUint64(&di.bytesRead)
143-
}
144-
145-
func (di *docValueReader) ResetBytesRead(val uint64) {
146-
atomic.StoreUint64(&di.bytesRead, val)
158+
func (d *docValueReader) getBytesRead() uint64 {
159+
return d.bytesRead
147160
}
148161

149-
func (di *docValueReader) incrementBytesRead(val uint64) {
150-
atomic.AddUint64(&di.bytesRead, val)
151-
}
152-
153-
func (di *docValueReader) BytesWritten() uint64 {
154-
return 0
162+
func (d *docValueReader) incrementBytesRead(val uint64) {
163+
d.bytesRead += val
155164
}
156165

157166
func (di *docValueReader) loadDvChunk(chunkNumber uint64, s *SegmentBase) error {
@@ -289,6 +298,7 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
289298
if dvs.segment != s {
290299
dvs.segment = s
291300
dvs.dvrs = nil
301+
dvs.bytesRead = 0
292302
}
293303
}
294304

@@ -327,7 +337,9 @@ func (s *SegmentBase) VisitDocValues(localDocNum uint64, fields []string,
327337
if err != nil {
328338
return dvs, err
329339
}
330-
s.incrementBytesRead(dvr.BytesRead())
340+
dvs.ResetBytesRead(dvr.getBytesRead())
341+
} else {
342+
dvs.ResetBytesRead(0)
331343
}
332344

333345
_ = dvr.visitDocValues(localDocNum, visitor)

go.mod

+2-2
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ go 1.18
44

55
require (
66
github.com/RoaringBitmap/roaring v0.9.4
7-
github.com/blevesearch/bleve_index_api v1.0.4
7+
github.com/blevesearch/bleve_index_api v1.0.5
88
github.com/blevesearch/mmap-go v1.0.4
9-
github.com/blevesearch/scorch_segment_api/v2 v2.1.3
9+
github.com/blevesearch/scorch_segment_api/v2 v2.1.4
1010
github.com/blevesearch/vellum v1.0.9
1111
github.com/golang/snappy v0.0.1
1212
github.com/spf13/cobra v1.4.0

go.sum

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,12 @@ github.com/RoaringBitmap/roaring v0.9.4 h1:ckvZSX5gwCRaJYBNe7syNawCU5oruY9gQmjXl
22
github.com/RoaringBitmap/roaring v0.9.4/go.mod h1:icnadbWcNyfEHlYdr+tDlOTih1Bf/h+rzPpv4sbomAA=
33
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
44
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
5-
github.com/blevesearch/bleve_index_api v1.0.4 h1:mtlzsyJjMIlDngqqB1mq8kPryUMIuEVVbRbJHOWEexU=
6-
github.com/blevesearch/bleve_index_api v1.0.4/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms=
5+
github.com/blevesearch/bleve_index_api v1.0.5 h1:Lc986kpC4Z0/n1g3gg8ul7H+lxgOQPcXb9SxvQGu+tw=
6+
github.com/blevesearch/bleve_index_api v1.0.5/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms=
77
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
88
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
9-
github.com/blevesearch/scorch_segment_api/v2 v2.1.3 h1:2UzpR2dR5DvSZk8tVJkcQ7D5xhoK/UBelYw8ttBHrRQ=
10-
github.com/blevesearch/scorch_segment_api/v2 v2.1.3/go.mod h1:eZrfp1y+lUh+DzFjUcTBUSnKGuunyFIpBIvqYVzJfvc=
9+
github.com/blevesearch/scorch_segment_api/v2 v2.1.4 h1:LmGmo5twU3gV+natJbKmOktS9eMhokPGKWuR+jX84vk=
10+
github.com/blevesearch/scorch_segment_api/v2 v2.1.4/go.mod h1:PgVnbbg/t1UkgezPDu8EHLi1BHQ17xUwsFdU6NnOYS0=
1111
github.com/blevesearch/vellum v1.0.9 h1:PL+NWVk3dDGPCV0hoDu9XLLJgqU4E5s/dOeEJByQ2uQ=
1212
github.com/blevesearch/vellum v1.0.9/go.mod h1:ul1oT0FhSMDIExNjIxHqJoGpVrBpKCdgDQNxfqgJt7k=
1313
github.com/cpuguy83/go-md2man/v2 v2.0.1/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=

intDecoder.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ package zap
1717
import (
1818
"encoding/binary"
1919
"fmt"
20-
"sync/atomic"
2120
)
2221

2322
type chunkedIntDecoder struct {
@@ -59,7 +58,7 @@ func newChunkedIntDecoder(buf []byte, offset uint64, rv *chunkedIntDecoder) *chu
5958
rv.chunkOffsets[i], read = binary.Uvarint(buf[offset+n : offset+n+binary.MaxVarintLen64])
6059
n += uint64(read)
6160
}
62-
atomic.AddUint64(&rv.bytesRead, n)
61+
rv.bytesRead += n
6362
rv.dataStartOffset = offset + n
6463
return rv
6564
}
@@ -70,7 +69,7 @@ func newChunkedIntDecoder(buf []byte, offset uint64, rv *chunkedIntDecoder) *chu
7069
// the loadChunk retrieves the next chunk and the
7170
// number of bytes retrieve in that operation is accounted
7271
func (d *chunkedIntDecoder) getBytesRead() uint64 {
73-
return atomic.LoadUint64(&d.bytesRead)
72+
return d.bytesRead
7473
}
7574

7675
func (d *chunkedIntDecoder) loadChunk(chunk int) error {
@@ -89,7 +88,7 @@ func (d *chunkedIntDecoder) loadChunk(chunk int) error {
8988
start += s
9089
end += e
9190
d.curChunkBytes = d.data[start:end]
92-
atomic.AddUint64(&d.bytesRead, uint64(len(d.curChunkBytes)))
91+
d.bytesRead += uint64(len(d.curChunkBytes))
9392
if d.r == nil {
9493
d.r = newMemUvarintReader(d.curChunkBytes)
9594
} else {
@@ -104,7 +103,7 @@ func (d *chunkedIntDecoder) reset() {
104103
d.dataStartOffset = 0
105104
d.chunkOffsets = d.chunkOffsets[:0]
106105
d.curChunkBytes = d.curChunkBytes[:0]
107-
atomic.StoreUint64(&d.bytesRead, 0)
106+
d.bytesRead = 0
108107
d.data = d.data[:0]
109108
if d.r != nil {
110109
d.r.Reset([]byte(nil))

posting.go

+6-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ import (
1919
"fmt"
2020
"math"
2121
"reflect"
22-
"sync/atomic"
2322

2423
"github.com/RoaringBitmap/roaring"
2524
segment "github.com/blevesearch/scorch_segment_api/v2"
@@ -255,15 +254,15 @@ func (p *PostingsList) Count() uint64 {
255254
// the bytes read from the postings lists stored
256255
// on disk, while querying
257256
func (p *PostingsList) ResetBytesRead(val uint64) {
258-
atomic.StoreUint64(&p.bytesRead, val)
257+
p.bytesRead = val
259258
}
260259

261260
func (p *PostingsList) BytesRead() uint64 {
262-
return atomic.LoadUint64(&p.bytesRead)
261+
return p.bytesRead
263262
}
264263

265264
func (p *PostingsList) incrementBytesRead(val uint64) {
266-
atomic.AddUint64(&p.bytesRead, val)
265+
p.bytesRead += val
267266
}
268267

269268
func (p *PostingsList) BytesWritten() uint64 {
@@ -368,15 +367,15 @@ func (i *PostingsIterator) Size() int {
368367
// the freqNorm and location specific information
369368
// of a hit
370369
func (i *PostingsIterator) ResetBytesRead(val uint64) {
371-
atomic.StoreUint64(&i.bytesRead, val)
370+
i.bytesRead = val
372371
}
373372

374373
func (i *PostingsIterator) BytesRead() uint64 {
375-
return atomic.LoadUint64(&i.bytesRead)
374+
return i.bytesRead
376375
}
377376

378377
func (i *PostingsIterator) incrementBytesRead(val uint64) {
379-
atomic.AddUint64(&i.bytesRead, val)
378+
i.bytesRead += val
380379
}
381380

382381
func (i *PostingsIterator) BytesWritten() uint64 {

read.go

-4
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ func (s *SegmentBase) getDocStoredMetaAndCompressed(docNum uint64) ([]byte, []by
2222
meta := s.mem[storedOffset+n : storedOffset+n+metaLen]
2323
data := s.mem[storedOffset+n+metaLen : storedOffset+n+metaLen+dataLen]
2424

25-
s.incrementBytesRead(metaLen + dataLen)
26-
2725
return meta, data
2826
}
2927

@@ -41,7 +39,5 @@ func (s *SegmentBase) getDocStoredOffsets(docNum uint64) (
4139
dataLen, read := binary.Uvarint(s.mem[storedOffset+n : storedOffset+n+binary.MaxVarintLen64])
4240
n += uint64(read)
4341

44-
s.incrementBytesRead(n)
45-
4642
return indexOffset, storedOffset, n, metaLen, dataLen
4743
}

segment.go

+2-3
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,7 @@ func (s *Segment) ResetBytesRead(val uint64) {
232232
}
233233

234234
func (s *Segment) BytesRead() uint64 {
235-
return atomic.LoadUint64(&s.bytesRead) +
236-
atomic.LoadUint64(&s.SegmentBase.bytesRead)
235+
return atomic.LoadUint64(&s.bytesRead)
237236
}
238237

239238
func (s *Segment) BytesWritten() uint64 {
@@ -321,7 +320,7 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
321320
// read the length of the vellum data
322321
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
323322
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
324-
sb.incrementBytesRead(uint64(read) + vellumLen)
323+
rv.incrementBytesRead(uint64(read) + vellumLen)
325324
rv.fst, err = vellum.Load(fstBytes)
326325
if err != nil {
327326
sb.m.Unlock()

0 commit comments

Comments
 (0)