Skip to content

Commit

Permalink
rpc: measure the amount of time spend sleeping in retry backoff
Browse files Browse the repository at this point in the history
This information is useful to understand if the HBase clusters is
overload (CallQueueTooBigException) and track the impact of it on
gohbase latency.
  • Loading branch information
dethi committed Oct 22, 2024
1 parent 1b7b7bd commit 88b6aaa
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 0 deletions.
12 changes: 12 additions & 0 deletions prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,16 @@ var (
Name: "regions_total",
Help: "Total number of regions in the cache",
})

retryBackoffDuration = promauto.NewHistogram(prometheus.HistogramOpts{
Namespace: "gohbase",
Subsystem: "retry",
Name: "backoff_duration_seconds",
Help: "Time spend sleeping in retry backoff",
// Buckets match the exact backoff time generated by the sleepAndIncreaseBackoff function
Buckets: []float64{
0.016, 0.032, 0.064, 0.128, 0.256, 0.512, 1.024, 2.048,
4.096, 8.192, 13.192, 18.192, 23.192, 28.192, 33.192,
},
})
)
5 changes: 5 additions & 0 deletions rpc.go
Original file line number Diff line number Diff line change
Expand Up @@ -1004,6 +1004,11 @@ func sleepAndIncreaseBackoff(ctx context.Context, backoff time.Duration) (time.D
return 0, ctx.Err()
}

// Keep track of the amount of time spend sleeping in retry backoff. Ignore if context was
// canceled.
retryBackoffDuration.Observe(backoff.Seconds())

// When changing this formula, update the buckets of the retryBackoffDuration metric too.
if backoff < 5*time.Second {
return backoff * 2, nil
} else if backoff < 30*time.Second {
Expand Down

0 comments on commit 88b6aaa

Please sign in to comment.