Cache overlapping blocks (#2239)

* Caches block iteration that are overlapping together to avoid reprocessing. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * lint. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Moar tests. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * Final touch. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com> * lint. Signed-off-by: Cyril Tovena <cyril.tovena@gmail.com>
grafana · Jun 22, 2020 · 6ab832b · 6ab832b
1 parent 732fe6d
commit 6ab832b
Show file tree

Hide file tree

Showing 13 changed files with 725 additions and 209 deletions.
diff --git a/pkg/chunkenc/dumb_chunk.go b/pkg/chunkenc/dumb_chunk.go
@@ -97,7 +97,11 @@ func (c *dumbChunk) Bytes() ([]byte, error) {
 	return nil, nil
 }
 
-func (c *dumbChunk) Blocks() int {
+func (c *dumbChunk) Blocks(_ time.Time, _ time.Time) []Block {
+	return nil
+}
+
+func (c *dumbChunk) BlockCount() int {
 	return 0
 }
 

diff --git a/pkg/chunkenc/interface.go b/pkg/chunkenc/interface.go
@@ -98,11 +98,27 @@ type Chunk interface {
 	SpaceFor(*logproto.Entry) bool
 	Append(*logproto.Entry) error
 	Iterator(ctx context.Context, from, through time.Time, direction logproto.Direction, filter logql.LineFilter) (iter.EntryIterator, error)
+	// Returns the list of blocks in the chunks.
+	Blocks(mintT, maxtT time.Time) []Block
 	Size() int
 	Bytes() ([]byte, error)
-	Blocks() int
+	BlockCount() int
 	Utilization() float64
 	UncompressedSize() int
 	CompressedSize() int
 	Close() error
 }
+
+// Block is a chunk block.
+type Block interface {
+	// MinTime is the minimum time of entries in the block
+	MinTime() int64
+	// MaxTime is the maximum time of entries in the block
+	MaxTime() int64
+	// Offset is the offset/position of the block in the chunk. Offset is unique for a given block per chunk.
+	Offset() int
+	// Entries is the amount of entries in the block.
+	Entries() int
+	// Iterator returns an entry iterator for the block.
+	Iterator(context.Context, logql.LineFilter) iter.EntryIterator
+}
diff --git a/pkg/chunkenc/lazy_chunk.go b/pkg/chunkenc/lazy_chunk.go
diff --git a/pkg/chunkenc/memchunk.go b/pkg/chunkenc/memchunk.go
@@ -80,6 +80,8 @@ type block struct {
 
 	offset           int // The offset of the block in the chunk.
 	uncompressedSize int // Total uncompressed size in bytes when the chunk is cut.
+
+	readers ReaderPool
 }
 
 // This block holds the un-compressed entries. Once it has enough data, this is
@@ -212,7 +214,9 @@ func NewByteChunk(b []byte, blockSize, targetSize int) (*MemChunk, error) {
 	bc.blocks = make([]block, 0, num)
 
 	for i := 0; i < num; i++ {
-		blk := block{}
+		blk := block{
+			readers: bc.readers,
+		}
 		// Read #entries.
 		blk.numEntries = db.uvarint()
 
@@ -339,8 +343,8 @@ func (c *MemChunk) Size() int {
 	return ne
 }
 
-// Blocks implements Chunk.
-func (c *MemChunk) Blocks() int {
+// BlockCount implements Chunk.
+func (c *MemChunk) BlockCount() int {
 	return len(c.blocks)
 }
 
@@ -431,6 +435,7 @@ func (c *MemChunk) cut() error {
 	}
 
 	c.blocks = append(c.blocks, block{
+		readers:          c.readers,
 		b:                b,
 		numEntries:       len(c.head.entries),
 		mint:             c.head.mint,
@@ -477,7 +482,7 @@ func (c *MemChunk) Iterator(ctx context.Context, mintT, maxtT time.Time, directi
 		if maxt < b.mint || b.maxt < mint {
 			continue
 		}
-		its = append(its, b.iterator(ctx, c.readers, filter))
+		its = append(its, b.Iterator(ctx, filter))
 	}
 
 	if !c.head.isEmpty() {
@@ -497,11 +502,38 @@ func (c *MemChunk) Iterator(ctx context.Context, mintT, maxtT time.Time, directi
 	return iter.NewEntryReversedIter(iterForward)
 }
 
-func (b block) iterator(ctx context.Context, pool ReaderPool, filter logql.LineFilter) iter.EntryIterator {
+// Blocks implements Chunk
+func (c *MemChunk) Blocks(mintT, maxtT time.Time) []Block {
+	mint, maxt := mintT.UnixNano(), maxtT.UnixNano()
+	blocks := make([]Block, 0, len(c.blocks))
+
+	for _, b := range c.blocks {
+		if maxt > b.mint && b.maxt > mint {
+			blocks = append(blocks, b)
+		}
+	}
+	return blocks
+}
+
+func (b block) Iterator(ctx context.Context, filter logql.LineFilter) iter.EntryIterator {
 	if len(b.b) == 0 {
 		return emptyIterator
 	}
-	return newBufferedIterator(ctx, pool, b.b, filter)
+	return newBufferedIterator(ctx, b.readers, b.b, filter)
+}
+
+func (b block) Offset() int {
+	return b.offset
+}
+
+func (b block) Entries() int {
+	return b.numEntries
+}
+func (b block) MinTime() int64 {
+	return b.mint
+}
+func (b block) MaxTime() int64 {
+	return b.maxt
 }
 
 func (hb *headBlock) iterator(ctx context.Context, mint, maxt int64, filter logql.LineFilter) iter.EntryIterator {

diff --git a/pkg/ingester/stream.go b/pkg/ingester/stream.go
@@ -152,7 +152,7 @@ func (s *stream) Push(ctx context.Context, entries []logproto.Entry, synchronize
 			chunk.closed = true
 
 			samplesPerChunk.Observe(float64(chunk.chunk.Size()))
-			blocksPerChunk.Observe(float64(chunk.chunk.Blocks()))
+			blocksPerChunk.Observe(float64(chunk.chunk.BlockCount()))
 			chunksCreatedTotal.Inc()
 
 			s.chunks = append(s.chunks, chunkDesc{