Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ethdb: add leveldb write delay statistic #16499

Merged
merged 1 commit into from
Apr 16, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 81 additions & 14 deletions ethdb/database.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package ethdb

import (
"fmt"
"strconv"
"strings"
"sync"
Expand All @@ -32,17 +33,25 @@ import (
"github.com/syndtr/goleveldb/leveldb/util"
)

const (
writeDelayNThreshold = 200
writeDelayThreshold = 350 * time.Millisecond
writeDelayWarningThrottler = 1 * time.Minute
)

var OpenFileLimit = 64

type LDBDatabase struct {
fn string // filename for reporting
db *leveldb.DB // LevelDB instance

compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written
compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction
compReadMeter metrics.Meter // Meter for measuring the data read during compaction
compWriteMeter metrics.Meter // Meter for measuring the data written during compaction
writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction
writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction
diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read
diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written

quitLock sync.Mutex // Mutex protecting the quit channel access
quitChan chan chan error // Quit channel to stop the metrics collection before closing the database
Expand Down Expand Up @@ -147,16 +156,17 @@ func (db *LDBDatabase) LDB() *leveldb.DB {

// Meter configures the database metrics collectors and
func (db *LDBDatabase) Meter(prefix string) {
// Short circuit metering if the metrics system is disabled
if !metrics.Enabled {
return
if metrics.Enabled {
// Initialize all the metrics collector at the requested prefix
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
}
// Initialize all the metrics collector at the requested prefix
db.compTimeMeter = metrics.NewRegisteredMeter(prefix+"compact/time", nil)
db.compReadMeter = metrics.NewRegisteredMeter(prefix+"compact/input", nil)
db.compWriteMeter = metrics.NewRegisteredMeter(prefix+"compact/output", nil)
db.diskReadMeter = metrics.NewRegisteredMeter(prefix+"disk/read", nil)
db.diskWriteMeter = metrics.NewRegisteredMeter(prefix+"disk/write", nil)
// Initialize write delay metrics no matter we are in metric mode or not.
db.writeDelayMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/duration", nil)
db.writeDelayNMeter = metrics.NewRegisteredMeter(prefix+"compact/writedelay/counter", nil)

// Create a quit channel for the periodic collector and run it
db.quitLock.Lock()
Expand All @@ -178,6 +188,9 @@ func (db *LDBDatabase) Meter(prefix string) {
// 2 | 523 | 1000.37159 | 7.26059 | 66.86342 | 66.77884
// 3 | 570 | 1113.18458 | 0.00000 | 0.00000 | 0.00000
//
// This is how the write delay look like (currently):
// DelayN:5 Delay:406.604657ms
//
// This is how the iostats look like (currently):
// Read(MB):3895.04860 Write(MB):3654.64712
func (db *LDBDatabase) meter(refresh time.Duration) {
Expand All @@ -188,6 +201,14 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
}
// Create storage for iostats.
var iostats [2]float64

// Create storage and warning log tracer for write delay.
var (
delaystats [2]int64
lastWriteDelay time.Time
lastWriteDelayN time.Time
)

// Iterate ad infinitum and collect the stats
for i := 1; ; i++ {
// Retrieve the database stats
Expand Down Expand Up @@ -236,6 +257,52 @@ func (db *LDBDatabase) meter(refresh time.Duration) {
db.compWriteMeter.Mark(int64((compactions[i%2][2] - compactions[(i-1)%2][2]) * 1024 * 1024))
}

// Retrieve the write delay statistic
writedelay, err := db.db.GetProperty("leveldb.writedelay")
if err != nil {
db.log.Error("Failed to read database write delay statistic", "err", err)
return
}
var (
delayN int64
delayDuration string
duration time.Duration
)
if n, err := fmt.Sscanf(writedelay, "DelayN:%d Delay:%s", &delayN, &delayDuration); n != 2 || err != nil {
db.log.Error("Write delay statistic not found")
return
}
duration, err = time.ParseDuration(delayDuration)
if err != nil {
db.log.Error("Failed to parse delay duration", "err", err)
return
}
if db.writeDelayNMeter != nil {
db.writeDelayNMeter.Mark(delayN - delaystats[0])
// If the write delay number been collected in the last minute exceeds the predefined threshold,
// print a warning log here.
// If a warning that db performance is laggy has been displayed,
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
if int(db.writeDelayNMeter.Rate1()) > writeDelayNThreshold &&
time.Now().After(lastWriteDelayN.Add(writeDelayWarningThrottler)) {
db.log.Warn("Write delay number exceeds the threshold (200 per second) in the last minute")
lastWriteDelayN = time.Now()
}
}
if db.writeDelayMeter != nil {
db.writeDelayMeter.Mark(duration.Nanoseconds() - delaystats[1])
// If the write delay duration been collected in the last minute exceeds the predefined threshold,
// print a warning log here.
// If a warning that db performance is laggy has been displayed,
// any subsequent warnings will be withhold for 1 minute to don't overwhelm the user.
if int64(db.writeDelayMeter.Rate1()) > writeDelayThreshold.Nanoseconds() &&
time.Now().After(lastWriteDelay.Add(writeDelayWarningThrottler)) {
db.log.Warn("Write delay duration exceeds the threshold (35% of the time) in the last minute")
lastWriteDelay = time.Now()
}
}
delaystats[0], delaystats[1] = delayN, duration.Nanoseconds()

// Retrieve the database iostats.
ioStats, err := db.db.GetProperty("leveldb.iostats")
if err != nil {
Expand Down