Skip to content

Commit

Permalink
Add ingester_chunks_flush_failures_total
Browse files Browse the repository at this point in the history
This commit adds a new metric to pkg/ingester/metrics.go to count
the total number of flush failures. Loki operators should create
alerts on this metric to avoid periods of recurring failures
from increasing memory-pressure on ingesters due to buffering of
chunks.

Signed-off-by: George Robinson <george.robinson@grafana.com>
  • Loading branch information
grobinson-grafana committed May 9, 2024
1 parent 67ed2f7 commit 9d8a623
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/ingester/flush.go
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,7 @@ func (i *Ingester) encodeChunk(ctx context.Context, ch *chunk.Chunk, desc *chunk
// chunk to have another opportunity to be flushed.
func (i *Ingester) flushChunk(ctx context.Context, ch *chunk.Chunk) error {
if err := i.store.Put(ctx, []chunk.Chunk{*ch}); err != nil {
i.metrics.chunksFlushFailures.Inc()
return fmt.Errorf("store put chunk: %w", err)
}
i.metrics.flushedChunksStats.Inc(1)
Expand Down
6 changes: 6 additions & 0 deletions pkg/ingester/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ type ingesterMetrics struct {
chunkSizePerTenant *prometheus.CounterVec
chunkAge prometheus.Histogram
chunkEncodeTime prometheus.Histogram
chunksFlushFailures prometheus.Counter
chunksFlushedPerReason *prometheus.CounterVec
chunkLifespan prometheus.Histogram
flushedChunksStats *analytics.Counter
Expand Down Expand Up @@ -232,6 +233,11 @@ func newIngesterMetrics(r prometheus.Registerer, metricsNamespace string) *inges
// 10ms to 10s.
Buckets: prometheus.ExponentialBuckets(0.01, 4, 6),
}),
chunksFlushFailures: promauto.With(r).NewCounter(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "ingester_chunks_flush_failures_total",
Help: "Total number of flush failures.",
}),
chunksFlushedPerReason: promauto.With(r).NewCounterVec(prometheus.CounterOpts{
Namespace: constants.Loki,
Name: "ingester_chunks_flushed_total",
Expand Down

0 comments on commit 9d8a623

Please sign in to comment.