diff --git a/cmd/XDC/chaincmd.go b/cmd/XDC/chaincmd.go index 5f12048f2cac..aa050dfd3c29 100644 --- a/cmd/XDC/chaincmd.go +++ b/cmd/XDC/chaincmd.go @@ -35,7 +35,6 @@ import ( "github.com/XinFinOrg/XDPoSChain/eth/downloader" "github.com/XinFinOrg/XDPoSChain/event" "github.com/XinFinOrg/XDPoSChain/log" - "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/urfave/cli/v2" ) @@ -71,6 +70,18 @@ It expects the genesis file as argument.`, utils.GCModeFlag, utils.CacheDatabaseFlag, utils.CacheGCFlag, + utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, + utils.MetricsEnableInfluxDBFlag, + utils.MetricsEnableInfluxDBV2Flag, + utils.MetricsInfluxDBEndpointFlag, + utils.MetricsInfluxDBDatabaseFlag, + utils.MetricsInfluxDBUsernameFlag, + utils.MetricsInfluxDBPasswordFlag, + utils.MetricsInfluxDBTagsFlag, + utils.MetricsInfluxDBTokenFlag, + utils.MetricsInfluxDBBucketFlag, + utils.MetricsInfluxDBOrganizationFlag, }, Description: ` The import command imports blocks from an RLP-encoded form. The form can be one file @@ -227,14 +238,12 @@ func importChain(ctx *cli.Context) error { if ctx.Args().Len() < 1 { utils.Fatalf("This command requires an argument.") } - // Start metrics export if enabled - utils.SetupMetrics(ctx) - // Start system runtime metrics collection - go metrics.CollectProcessMetrics(3 * time.Second) - - stack, _ := makeFullNode(ctx) + stack, cfg := makeFullNode(ctx) defer stack.Close() + // Start metrics export if enabled + utils.SetupMetrics(&cfg.Metrics) + chain, chainDb := utils.MakeChain(ctx, stack) defer chainDb.Close() diff --git a/cmd/XDC/config.go b/cmd/XDC/config.go index d5e60c932180..ef0dbbb00df1 100644 --- a/cmd/XDC/config.go +++ b/cmd/XDC/config.go @@ -32,6 +32,8 @@ import ( "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/eth/ethconfig" "github.com/XinFinOrg/XDPoSChain/internal/flags" + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/node" "github.com/XinFinOrg/XDPoSChain/params" "github.com/naoina/toml" @@ -90,6 +92,7 @@ type XDCConfig struct { Eth ethconfig.Config Node node.Config Ethstats ethstatsConfig + Metrics metrics.Config XDCX XDCx.Config Account account StakeEnable bool @@ -122,12 +125,14 @@ func defaultNodeConfig() node.Config { return cfg } +// makeConfigNode loads geth configuration and creates a blank node instance. func makeConfigNode(ctx *cli.Context) (*node.Node, XDCConfig) { // Load defaults. cfg := XDCConfig{ Eth: ethconfig.Defaults, XDCX: XDCx.DefaultConfig, Node: defaultNodeConfig(), + Metrics: metrics.DefaultConfig, StakeEnable: true, Verbosity: 3, NAT: "", @@ -208,6 +213,9 @@ func makeConfigNode(ctx *cli.Context) (*node.Node, XDCConfig) { } utils.SetXDCXConfig(ctx, &cfg.XDCX, cfg.Node.DataDir) + + applyMetricConfig(ctx, &cfg) + return stack, cfg } @@ -227,10 +235,13 @@ func applyValues(values []string, params *[]string) { func makeFullNode(ctx *cli.Context) (*node.Node, XDCConfig) { stack, cfg := makeConfigNode(ctx) + // Start metrics export if enabled + utils.SetupMetrics(&cfg.Metrics) + // Register XDCX's OrderBook service if requested. // enable in default utils.RegisterXDCXService(stack, &cfg.XDCX) - utils.RegisterEthService(stack, &cfg.Eth) + utils.RegisterEthService(stack, &cfg.Eth, cfg.Node.Version) // Add the Ethereum Stats daemon if requested. if cfg.Ethstats.URL != "" { @@ -258,3 +269,69 @@ func dumpConfig(ctx *cli.Context) error { os.Stdout.Write(out) return nil } + +func applyMetricConfig(ctx *cli.Context, cfg *XDCConfig) { + if ctx.IsSet(utils.MetricsEnabledFlag.Name) { + cfg.Metrics.Enabled = ctx.Bool(utils.MetricsEnabledFlag.Name) + } + if ctx.IsSet(utils.MetricsEnabledExpensiveFlag.Name) { + log.Warn("Expensive metrics are collected by default, please remove this flag", "flag", utils.MetricsEnabledExpensiveFlag.Name) + } + if ctx.IsSet(utils.MetricsHTTPFlag.Name) { + cfg.Metrics.HTTP = ctx.String(utils.MetricsHTTPFlag.Name) + } + if ctx.IsSet(utils.MetricsPortFlag.Name) { + cfg.Metrics.Port = ctx.Int(utils.MetricsPortFlag.Name) + } + if ctx.IsSet(utils.MetricsEnableInfluxDBFlag.Name) { + cfg.Metrics.EnableInfluxDB = ctx.Bool(utils.MetricsEnableInfluxDBFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBEndpointFlag.Name) { + cfg.Metrics.InfluxDBEndpoint = ctx.String(utils.MetricsInfluxDBEndpointFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBDatabaseFlag.Name) { + cfg.Metrics.InfluxDBDatabase = ctx.String(utils.MetricsInfluxDBDatabaseFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBUsernameFlag.Name) { + cfg.Metrics.InfluxDBUsername = ctx.String(utils.MetricsInfluxDBUsernameFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBPasswordFlag.Name) { + cfg.Metrics.InfluxDBPassword = ctx.String(utils.MetricsInfluxDBPasswordFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBTagsFlag.Name) { + cfg.Metrics.InfluxDBTags = ctx.String(utils.MetricsInfluxDBTagsFlag.Name) + } + if ctx.IsSet(utils.MetricsEnableInfluxDBV2Flag.Name) { + cfg.Metrics.EnableInfluxDBV2 = ctx.Bool(utils.MetricsEnableInfluxDBV2Flag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBTokenFlag.Name) { + cfg.Metrics.InfluxDBToken = ctx.String(utils.MetricsInfluxDBTokenFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBBucketFlag.Name) { + cfg.Metrics.InfluxDBBucket = ctx.String(utils.MetricsInfluxDBBucketFlag.Name) + } + if ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) { + cfg.Metrics.InfluxDBOrganization = ctx.String(utils.MetricsInfluxDBOrganizationFlag.Name) + } + // Sanity-check the commandline flags. It is fine if some unused fields is part + // of the toml-config, but we expect the commandline to only contain relevant + // arguments, otherwise it indicates an error. + var ( + enableExport = ctx.Bool(utils.MetricsEnableInfluxDBFlag.Name) + enableExportV2 = ctx.Bool(utils.MetricsEnableInfluxDBV2Flag.Name) + ) + if enableExport || enableExportV2 { + v1FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBUsernameFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBPasswordFlag.Name) + + v2FlagIsSet := ctx.IsSet(utils.MetricsInfluxDBTokenFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBOrganizationFlag.Name) || + ctx.IsSet(utils.MetricsInfluxDBBucketFlag.Name) + + if enableExport && v2FlagIsSet { + utils.Fatalf("Flags --influxdb-metrics.organization, --influxdb-metrics.token, --influxdb-metrics.bucket are only available for influxdb-v2") + } else if enableExportV2 && v1FlagIsSet { + utils.Fatalf("Flags --influxdb-metrics.username, --influxdb-metrics.password are only available for influxdb-v1") + } + } +} diff --git a/cmd/XDC/main.go b/cmd/XDC/main.go index 5ececb431df7..f3b9bdba139e 100644 --- a/cmd/XDC/main.go +++ b/cmd/XDC/main.go @@ -126,9 +126,6 @@ var ( utils.HTTPCORSDomainFlag, utils.HTTPVirtualHostsFlag, utils.EthStatsURLFlag, - utils.MetricsEnabledFlag, - utils.MetricsHTTPFlag, - utils.MetricsPortFlag, //utils.FakePoWFlag, //utils.NoCompactionFlag, //utils.GpoBlocksFlag, @@ -163,6 +160,23 @@ var ( utils.IPCPathFlag, utils.RPCGlobalTxFeeCap, } + + metricsFlags = []cli.Flag{ + utils.MetricsEnabledFlag, + utils.MetricsEnabledExpensiveFlag, + utils.MetricsHTTPFlag, + utils.MetricsPortFlag, + utils.MetricsEnableInfluxDBFlag, + utils.MetricsInfluxDBEndpointFlag, + utils.MetricsInfluxDBDatabaseFlag, + utils.MetricsInfluxDBUsernameFlag, + utils.MetricsInfluxDBPasswordFlag, + utils.MetricsInfluxDBTagsFlag, + utils.MetricsEnableInfluxDBV2Flag, + utils.MetricsInfluxDBTokenFlag, + utils.MetricsInfluxDBBucketFlag, + utils.MetricsInfluxDBOrganizationFlag, + } ) func init() { @@ -194,6 +208,7 @@ func init() { app.Flags = append(app.Flags, rpcFlags...) app.Flags = append(app.Flags, consoleFlags...) app.Flags = append(app.Flags, debug.Flags...) + app.Flags = append(app.Flags, metricsFlags...) flags.AutoEnvVars(app.Flags, "XDC") app.Before = func(ctx *cli.Context) error { @@ -204,6 +219,9 @@ func init() { } flags.CheckEnvVars(ctx, app.Flags, "XDC") + // Start system runtime metrics collection + go metrics.CollectProcessMetrics(3 * time.Second) + utils.SetupNetwork(ctx) return nil } @@ -303,11 +321,6 @@ func startNode(ctx *cli.Context, stack *node.Node, cfg XDCConfig) { if ctx.Bool(utils.LightModeFlag.Name) || ctx.String(utils.SyncModeFlag.Name) == "light" { utils.Fatalf("Light clients do not support staking") } - // Start metrics export if enabled - utils.SetupMetrics(ctx) - - // Start system runtime metrics collection - go metrics.CollectProcessMetrics(3 * time.Second) var ethereum *eth.Ethereum if err := stack.Service(ðereum); err != nil { diff --git a/cmd/utils/flags.go b/cmd/utils/flags.go index 0efc867a273e..18ce92a23fc8 100644 --- a/cmd/utils/flags.go +++ b/cmd/utils/flags.go @@ -22,12 +22,14 @@ import ( "fmt" "math" "math/big" + "net" "os" "path/filepath" "runtime" godebug "runtime/debug" "strconv" "strings" + "time" "github.com/XinFinOrg/XDPoSChain/XDCx" "github.com/XinFinOrg/XDPoSChain/accounts" @@ -52,6 +54,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/metrics/exp" + "github.com/XinFinOrg/XDPoSChain/metrics/influxdb" "github.com/XinFinOrg/XDPoSChain/node" "github.com/XinFinOrg/XDPoSChain/p2p" "github.com/XinFinOrg/XDPoSChain/p2p/discover" @@ -642,7 +645,6 @@ var ( Usage: "Enable metrics collection and reporting", Category: flags.MetricsCategory, } - // MetricsHTTPFlag defines the endpoint for a stand-alone metrics HTTP endpoint. // Since the pprof service enables sensitive/vulnerable behavior, this allows a user // to enable a public-OK metrics endpoint without having to worry about ALSO exposing @@ -661,6 +663,69 @@ var ( Value: metrics.DefaultConfig.Port, Category: flags.MetricsCategory, } + MetricsEnableInfluxDBFlag = &cli.BoolFlag{ + Name: "metrics-influxdb", + Usage: "Enable metrics export/push to an external InfluxDB database", + Category: flags.MetricsCategory, + } + MetricsInfluxDBEndpointFlag = &cli.StringFlag{ + Name: "metrics-influxdb.endpoint", + Usage: "InfluxDB API endpoint to report metrics to", + Value: metrics.DefaultConfig.InfluxDBEndpoint, + Category: flags.MetricsCategory, + } + MetricsInfluxDBDatabaseFlag = &cli.StringFlag{ + Name: "metrics-influxdb.database", + Usage: "InfluxDB database name to push reported metrics to", + Value: metrics.DefaultConfig.InfluxDBDatabase, + Category: flags.MetricsCategory, + } + MetricsInfluxDBUsernameFlag = &cli.StringFlag{ + Name: "metrics-influxdb.username", + Usage: "Username to authorize access to the database", + Value: metrics.DefaultConfig.InfluxDBUsername, + Category: flags.MetricsCategory, + } + MetricsInfluxDBPasswordFlag = &cli.StringFlag{ + Name: "metrics-influxdb.password", + Usage: "Password to authorize access to the database", + Value: metrics.DefaultConfig.InfluxDBPassword, + Category: flags.MetricsCategory, + } + // Tags are part of every measurement sent to InfluxDB. Queries on tags are faster in InfluxDB. + // For example `host` tag could be used so that we can group all nodes and average a measurement + // across all of them, but also so that we can select a specific node and inspect its measurements. + // https://docs.influxdata.com/influxdb/v1.4/concepts/key_concepts/#tag-key + MetricsInfluxDBTagsFlag = &cli.StringFlag{ + Name: "metrics-influxdb.tags", + Usage: "Comma-separated InfluxDB tags (key/values) attached to all measurements", + Value: metrics.DefaultConfig.InfluxDBTags, + Category: flags.MetricsCategory, + } + + MetricsEnableInfluxDBV2Flag = &cli.BoolFlag{ + Name: "metrics-influxdbv2", + Usage: "Enable metrics export/push to an external InfluxDB v2 database", + Category: flags.MetricsCategory, + } + MetricsInfluxDBTokenFlag = &cli.StringFlag{ + Name: "metrics-influxdb.token", + Usage: "Token to authorize access to the database (v2 only)", + Value: metrics.DefaultConfig.InfluxDBToken, + Category: flags.MetricsCategory, + } + MetricsInfluxDBBucketFlag = &cli.StringFlag{ + Name: "metrics-influxdb.bucket", + Usage: "InfluxDB bucket name to push reported metrics to (v2 only)", + Value: metrics.DefaultConfig.InfluxDBBucket, + Category: flags.MetricsCategory, + } + MetricsInfluxDBOrganizationFlag = &cli.StringFlag{ + Name: "metrics-influxdb.organization", + Usage: "InfluxDB organization name (v2 only)", + Value: metrics.DefaultConfig.InfluxDBOrganization, + Category: flags.MetricsCategory, + } // MISC settings RollbackFlag = &cli.StringFlag{ @@ -1468,6 +1533,73 @@ func SetupNetwork(ctx *cli.Context) { params.TargetGasLimit = ctx.Uint64(MinerGasLimitFlag.Name) } +// SetupMetrics configures the metrics system. +func SetupMetrics(cfg *metrics.Config) { + if !cfg.Enabled { + return + } + log.Info("Enabling metrics collection") + metrics.Enable() + + // InfluxDB exporter. + var ( + enableExport = cfg.EnableInfluxDB + enableExportV2 = cfg.EnableInfluxDBV2 + ) + if cfg.EnableInfluxDB && cfg.EnableInfluxDBV2 { + Fatalf("Flags %v can't be used at the same time", strings.Join([]string{MetricsEnableInfluxDBFlag.Name, MetricsEnableInfluxDBV2Flag.Name}, ", ")) + } + var ( + endpoint = cfg.InfluxDBEndpoint + database = cfg.InfluxDBDatabase + username = cfg.InfluxDBUsername + password = cfg.InfluxDBPassword + + token = cfg.InfluxDBToken + bucket = cfg.InfluxDBBucket + organization = cfg.InfluxDBOrganization + tagsMap = SplitTagsFlag(cfg.InfluxDBTags) + ) + if enableExport { + log.Info("Enabling metrics export to InfluxDB") + go influxdb.InfluxDBWithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, database, username, password, "geth.", tagsMap) + } else if enableExportV2 { + tagsMap := SplitTagsFlag(cfg.InfluxDBTags) + log.Info("Enabling metrics export to InfluxDB (v2)") + go influxdb.InfluxDBV2WithTags(metrics.DefaultRegistry, 10*time.Second, endpoint, token, bucket, organization, "geth.", tagsMap) + } + + // Expvar exporter. + if cfg.HTTP != "" { + address := net.JoinHostPort(cfg.HTTP, fmt.Sprintf("%d", cfg.Port)) + log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) + exp.Setup(address) + } else if cfg.HTTP == "" && cfg.Port != 0 { + log.Warn(fmt.Sprintf("--%s specified without --%s, metrics server will not start.", MetricsPortFlag.Name, MetricsHTTPFlag.Name)) + } + + // Enable system metrics collection. + go metrics.CollectProcessMetrics(3 * time.Second) +} + +// SplitTagsFlag parses a comma-separated list of k=v metrics tags. +func SplitTagsFlag(tagsFlag string) map[string]string { + tags := strings.Split(tagsFlag, ",") + tagsMap := map[string]string{} + + for _, t := range tags { + if t != "" { + kv := strings.Split(t, "=") + + if len(kv) == 2 { + tagsMap[kv[0]] = kv[1] + } + } + } + + return tagsMap +} + // MakeChainDatabase open an LevelDB using the flags passed to the client and will hard crash if it fails. func MakeChainDatabase(ctx *cli.Context, stack *node.Node) ethdb.Database { var ( @@ -1595,15 +1727,3 @@ func RegisterFilterAPI(stack *node.Node, backend ethapi.Backend, ethcfg *ethconf }}) return filterSystem } - -func SetupMetrics(ctx *cli.Context) { - if metrics.Enabled { - log.Info("Enabling metrics collection") - - if ctx.IsSet(MetricsHTTPFlag.Name) { - address := fmt.Sprintf("%s:%d", ctx.String(MetricsHTTPFlag.Name), ctx.Int(MetricsPortFlag.Name)) - log.Info("Enabling stand-alone metrics HTTP endpoint", "address", address) - exp.Setup(address) - } - } -} diff --git a/cmd/utils/flags_legacy.go b/cmd/utils/flags_legacy.go index b47a74595613..afd1e0e8bba8 100644 --- a/cmd/utils/flags_legacy.go +++ b/cmd/utils/flags_legacy.go @@ -50,7 +50,7 @@ var ( Usage: "Enable light client mode", Category: flags.DeprecatedCategory, } - // (Deprecated May 2020, shown in aliased flags section) + // Deprecated May 2020, shown in aliased flags section NoUSBFlag = &cli.BoolFlag{ Name: "nousb", Usage: "Disables monitoring for and managing USB hardware wallets (deprecated)", @@ -68,6 +68,12 @@ var ( Usage: "Prepends log messages with call-site location (deprecated)", Category: flags.DeprecatedCategory, } + // Deprecated February 2024 + MetricsEnabledExpensiveFlag = &cli.BoolFlag{ + Name: "metrics-expensive", + Usage: "Enable expensive metrics collection and reporting (deprecated)", + Category: flags.DeprecatedCategory, + } ) // showDeprecated displays deprecated flags that will be soon removed from the codebase. diff --git a/cmd/utils/flags_test.go b/cmd/utils/flags_test.go index adbf25b45bf6..728cbb707014 100644 --- a/cmd/utils/flags_test.go +++ b/cmd/utils/flags_test.go @@ -1,3 +1,20 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of go-ethereum. +// +// go-ethereum is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// go-ethereum is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with go-ethereum. If not, see . + +// Package utils contains internal helper functions for go-ethereum commands. package utils import ( @@ -8,6 +25,49 @@ import ( "testing" ) +func Test_SplitTagsFlag(t *testing.T) { + t.Parallel() + tests := []struct { + name string + args string + want map[string]string + }{ + { + "2 tags case", + "host=localhost,bzzkey=123", + map[string]string{ + "host": "localhost", + "bzzkey": "123", + }, + }, + { + "1 tag case", + "host=localhost123", + map[string]string{ + "host": "localhost123", + }, + }, + { + "empty case", + "", + map[string]string{}, + }, + { + "garbage", + "smth=smthelse=123", + map[string]string{}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + if got := SplitTagsFlag(tt.args); !reflect.DeepEqual(got, tt.want) { + t.Errorf("splitTagsFlag() = %v, want %v", got, tt.want) + } + }) + } +} + func TestWalkMatch(t *testing.T) { type args struct { root string diff --git a/cmd/utils/utils.go b/cmd/utils/utils.go index bc676c7b08ff..5afa50c32bbe 100644 --- a/cmd/utils/utils.go +++ b/cmd/utils/utils.go @@ -1,6 +1,10 @@ package utils import ( + "fmt" + "runtime" + "strings" + "github.com/XinFinOrg/XDPoSChain/XDCx" "github.com/XinFinOrg/XDPoSChain/XDCxlending" "github.com/XinFinOrg/XDPoSChain/eth" @@ -8,11 +12,12 @@ import ( "github.com/XinFinOrg/XDPoSChain/eth/ethconfig" "github.com/XinFinOrg/XDPoSChain/ethstats" "github.com/XinFinOrg/XDPoSChain/les" + "github.com/XinFinOrg/XDPoSChain/metrics" "github.com/XinFinOrg/XDPoSChain/node" ) // RegisterEthService adds an Ethereum client to the stack. -func RegisterEthService(stack *node.Node, cfg *ethconfig.Config) { +func RegisterEthService(stack *node.Node, cfg *ethconfig.Config, version string) { var err error if cfg.SyncMode == downloader.LightSync { err = stack.Register(func(ctx *node.ServiceContext) (node.Service, error) { @@ -29,6 +34,21 @@ func RegisterEthService(stack *node.Node, cfg *ethconfig.Config) { ls, _ := les.NewLesServer(fullNode, cfg) fullNode.AddLesServer(ls) } + + // TODO: move the following code to function makeFullNode + // Ref: #21105, #22641, #23761, #24877 + // Create gauge with geth system and build information + var protos []string + for _, p := range fullNode.Protocols() { + protos = append(protos, fmt.Sprintf("%v/%d", p.Name, p.Version)) + } + metrics.NewRegisteredGaugeInfo("xdc/info", nil).Update(metrics.GaugeInfoValue{ + "arch": runtime.GOARCH, + "os": runtime.GOOS, + "version": version, // cfg.Node.Version + "eth_protocols": strings.Join(protos, ","), + }) + return fullNode, err }) } diff --git a/consensus/ethash/ethash.go b/consensus/ethash/ethash.go index a65468f57103..394d495a07e8 100644 --- a/consensus/ethash/ethash.go +++ b/consensus/ethash/ethash.go @@ -401,7 +401,7 @@ type Ethash struct { rand *rand.Rand // Properly seeded random source for nonces threads int // Number of threads to mine on if mining update chan struct{} // Notification channel to update mining parameters - hashrate metrics.Meter // Meter tracking the average hashrate + hashrate *metrics.Meter // Meter tracking the average hashrate // The fields below are hooks for testing shared *Ethash // Shared PoW verifier to avoid cache regeneration @@ -562,7 +562,7 @@ func (ethash *Ethash) SetThreads(threads int) { // Hashrate implements PoW, returning the measured rate of the search invocations // per second over the last minute. func (ethash *Ethash) Hashrate() float64 { - return ethash.hashrate.Rate1() + return ethash.hashrate.Snapshot().Rate1() } // APIs implements consensus.Engine, returning the user facing RPC APIs. Currently diff --git a/core/blockchain.go b/core/blockchain.go index 2f731331decc..7aebc61155bd 100644 --- a/core/blockchain.go +++ b/core/blockchain.go @@ -56,13 +56,34 @@ import ( ) var ( - blockInsertTimer = metrics.NewRegisteredTimer("chain/inserts", nil) - CheckpointCh = make(chan int) - ErrNoGenesis = errors.New("Genesis not found in chain") + headBlockGauge = metrics.NewRegisteredGauge("chain/head/block", nil) + headHeaderGauge = metrics.NewRegisteredGauge("chain/head/header", nil) + headFastBlockGauge = metrics.NewRegisteredGauge("chain/head/receipt", nil) + + chainInfoGauge = metrics.NewRegisteredGaugeInfo("chain/info", nil) + + accountReadTimer = metrics.NewRegisteredResettingTimer("chain/account/reads", nil) + accountHashTimer = metrics.NewRegisteredResettingTimer("chain/account/hashes", nil) + accountUpdateTimer = metrics.NewRegisteredResettingTimer("chain/account/updates", nil) + accountCommitTimer = metrics.NewRegisteredResettingTimer("chain/account/commits", nil) + + storageReadTimer = metrics.NewRegisteredResettingTimer("chain/storage/reads", nil) + storageHashTimer = metrics.NewRegisteredResettingTimer("chain/storage/hashes", nil) + storageUpdateTimer = metrics.NewRegisteredResettingTimer("chain/storage/updates", nil) + storageCommitTimer = metrics.NewRegisteredResettingTimer("chain/storage/commits", nil) + + blockInsertTimer = metrics.NewRegisteredResettingTimer("chain/inserts", nil) + blockValidationTimer = metrics.NewRegisteredResettingTimer("chain/validation", nil) + blockExecutionTimer = metrics.NewRegisteredResettingTimer("chain/execution", nil) + blockWriteTimer = metrics.NewRegisteredResettingTimer("chain/write", nil) blockReorgMeter = metrics.NewRegisteredMeter("chain/reorg/executes", nil) blockReorgAddMeter = metrics.NewRegisteredMeter("chain/reorg/add", nil) blockReorgDropMeter = metrics.NewRegisteredMeter("chain/reorg/drop", nil) + + CheckpointCh = make(chan int) + + ErrNoGenesis = errors.New("Genesis not found in chain") ) const ( @@ -225,6 +246,10 @@ func NewBlockChain(db ethdb.Database, cacheConfig *CacheConfig, chainConfig *par if bc.genesisBlock == nil { return nil, ErrNoGenesis } + + // Update chain info data metrics + chainInfoGauge.Update(metrics.GaugeInfoValue{"chain_id": bc.chainConfig.ChainId.String()}) + if err := bc.loadLastState(); err != nil { return nil, err } @@ -340,6 +365,7 @@ func (bc *BlockChain) loadLastState() error { } // Everything seems to be fine, set as the head block bc.currentBlock.Store(currentBlock) + headBlockGauge.Update(int64(currentBlock.NumberU64())) // Restore the last known head header currentHeader := currentBlock.Header() @@ -359,9 +385,12 @@ func (bc *BlockChain) loadLastState() error { // Restore the last known head fast block bc.currentFastBlock.Store(currentBlock) + headFastBlockGauge.Update(int64(currentBlock.NumberU64())) + if head := GetHeadFastBlockHash(bc.db); head != (common.Hash{}) { if block := bc.GetBlockByHash(head); block != nil { bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) } } @@ -407,23 +436,28 @@ func (bc *BlockChain) SetHead(head uint64) error { // Rewind the block chain, ensuring we don't end up with a stateless head block if currentBlock := bc.CurrentBlock(); currentBlock != nil && currentHeader.Number.Uint64() < currentBlock.NumberU64() { bc.currentBlock.Store(bc.GetBlock(currentHeader.Hash(), currentHeader.Number.Uint64())) + headBlockGauge.Update(int64(currentHeader.Number.Uint64())) } if currentBlock := bc.CurrentBlock(); currentBlock != nil { if _, err := state.New(currentBlock.Root(), bc.stateCache); err != nil { // Rewound state missing, rolled back to before pivot, reset to genesis bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } } // Rewind the fast block in a simpleton way to the target head if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock != nil && currentHeader.Number.Uint64() < currentFastBlock.NumberU64() { bc.currentFastBlock.Store(bc.GetBlock(currentHeader.Hash(), currentHeader.Number.Uint64())) + headFastBlockGauge.Update(int64(currentHeader.Number.Uint64())) } // If either blocks reached nil, reset to the genesis state if currentBlock := bc.CurrentBlock(); currentBlock == nil { bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } if currentFastBlock := bc.CurrentFastBlock(); currentFastBlock == nil { bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) } currentBlock := bc.CurrentBlock() currentFastBlock := bc.CurrentFastBlock() @@ -448,6 +482,7 @@ func (bc *BlockChain) FastSyncCommitHead(hash common.Hash) error { // If all checks out, manually set the head block bc.mu.Lock() bc.currentBlock.Store(block) + headBlockGauge.Update(int64(block.NumberU64())) bc.mu.Unlock() log.Info("Committed new head block", "number", block.Number(), "hash", hash) @@ -578,9 +613,12 @@ func (bc *BlockChain) ResetWithGenesisBlock(genesis *types.Block) error { bc.genesisBlock = genesis bc.insert(bc.genesisBlock, false) bc.currentBlock.Store(bc.genesisBlock) + headBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) + bc.hc.SetGenesis(bc.genesisBlock.Header()) bc.hc.SetCurrentHeader(bc.genesisBlock.Header()) bc.currentFastBlock.Store(bc.genesisBlock) + headFastBlockGauge.Update(int64(bc.genesisBlock.NumberU64())) return nil } @@ -680,7 +718,9 @@ func (bc *BlockChain) insert(block *types.Block, writeBlock bool) { if writeBlock { rawdb.WriteBlock(bc.db, block) } + bc.currentBlock.Store(block) + headBlockGauge.Update(int64(block.NumberU64())) // save cache BlockSigners if bc.chainConfig.XDPoS != nil && !bc.chainConfig.IsTIPSigning(block.Number()) { @@ -698,6 +738,7 @@ func (bc *BlockChain) insert(block *types.Block, writeBlock bool) { log.Crit("Failed to insert head fast block hash", "err", err) } bc.currentFastBlock.Store(block) + headFastBlockGauge.Update(int64(block.NumberU64())) } } @@ -1035,10 +1076,12 @@ func (bc *BlockChain) Rollback(chain []common.Hash) { newFastBlock := bc.GetBlock(currentFastBlock.ParentHash(), currentFastBlock.NumberU64()-1) bc.currentFastBlock.Store(newFastBlock) WriteHeadFastBlockHash(bc.db, newFastBlock.Hash()) + headFastBlockGauge.Update(int64(newFastBlock.NumberU64())) } if currentBlock := bc.CurrentBlock(); currentBlock.Hash() == hash { newBlock := bc.GetBlock(currentBlock.ParentHash(), currentBlock.NumberU64()-1) bc.currentBlock.Store(newBlock) + headBlockGauge.Update(int64(newBlock.NumberU64())) rawdb.WriteHeadBlockHash(bc.db, newBlock.Hash()) } } @@ -1121,6 +1164,7 @@ func (bc *BlockChain) InsertReceiptChain(blockChain types.Blocks, receiptChain [ log.Crit("Failed to update head fast block hash", "err", err) } bc.currentFastBlock.Store(head) + headFastBlockGauge.Update(int64(head.NumberU64())) } } bc.mu.Unlock() @@ -1658,7 +1702,9 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] } feeCapacity := state.GetTRC21FeeCapacityFromStateWithCache(parent.Root(), statedb) // Process block using the parent state as reference point. + t0 := time.Now() receipts, logs, usedGas, err := bc.processor.Process(block, statedb, tradingState, bc.vmConfig, feeCapacity) + t1 := time.Now() if err != nil { bc.reportBlock(block, receipts, err) return i, events, coalescedLogs, err @@ -1669,19 +1715,41 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] bc.reportBlock(block, receipts, err) return i, events, coalescedLogs, err } + t2 := time.Now() proctime := time.Since(bstart) + // Write the block to the chain and get the status. status, err := bc.WriteBlockWithState(block, receipts, statedb, tradingState, lendingState) + t3 := time.Now() if err != nil { return i, events, coalescedLogs, err } + + // Update the metrics subsystem with all the measurements + accountReadTimer.Update(statedb.AccountReads) + accountHashTimer.Update(statedb.AccountHashes) + accountUpdateTimer.Update(statedb.AccountUpdates) + accountCommitTimer.Update(statedb.AccountCommits) + + storageReadTimer.Update(statedb.StorageReads) + storageHashTimer.Update(statedb.StorageHashes) + storageUpdateTimer.Update(statedb.StorageUpdates) + storageCommitTimer.Update(statedb.StorageCommits) + + trieAccess := statedb.AccountReads + statedb.AccountHashes + statedb.AccountUpdates + statedb.AccountCommits + trieAccess += statedb.StorageReads + statedb.StorageHashes + statedb.StorageUpdates + statedb.StorageCommits + + blockInsertTimer.UpdateSince(bstart) + blockExecutionTimer.Update(t1.Sub(t0) - trieAccess) + blockValidationTimer.Update(t2.Sub(t1)) + blockWriteTimer.Update(t3.Sub(t2)) + switch status { case CanonStatTy: log.Debug("Inserted new block from downloader", "number", block.Number(), "hash", block.Hash(), "uncles", len(block.Uncles()), "txs", len(block.Transactions()), "gas", block.GasUsed(), "elapsed", common.PrettyDuration(time.Since(bstart))) coalescedLogs = append(coalescedLogs, logs...) - blockInsertTimer.UpdateSince(bstart) events = append(events, ChainEvent{block, block.Hash(), logs}) lastCanon = block @@ -1695,8 +1763,6 @@ func (bc *BlockChain) insertChain(chain types.Blocks, verifySeals bool) (int, [] case SideStatTy: log.Debug("Inserted forked block from downloader", "number", block.Number(), "hash", block.Hash(), "diff", block.Difficulty(), "elapsed", common.PrettyDuration(time.Since(bstart)), "txs", len(block.Transactions()), "gas", block.GasUsed(), "uncles", len(block.Uncles())) - - blockInsertTimer.UpdateSince(bstart) events = append(events, ChainSideEvent{block}) bc.UpdateBlocksHashCache(block) } @@ -2015,7 +2081,6 @@ func (bc *BlockChain) insertBlock(block *types.Block) ([]interface{}, []*types.L "txs", len(block.Transactions()), "gas", block.GasUsed(), "elapsed", common.PrettyDuration(time.Since(block.ReceivedAt))) coalescedLogs = append(coalescedLogs, result.logs...) events = append(events, ChainEvent{block, block.Hash(), result.logs}) - // Only count canonical blocks for GC processing time bc.gcproc += result.proctime bc.UpdateBlocksHashCache(block) @@ -2026,10 +2091,8 @@ func (bc *BlockChain) insertBlock(block *types.Block) ([]interface{}, []*types.L case SideStatTy: log.Debug("Inserted forked block from fetcher", "number", block.Number(), "hash", block.Hash(), "diff", block.Difficulty(), "elapsed", common.PrettyDuration(time.Since(block.ReceivedAt)), "txs", len(block.Transactions()), "gas", block.GasUsed(), "uncles", len(block.Uncles())) - blockInsertTimer.Update(result.proctime) events = append(events, ChainSideEvent{block}) - bc.UpdateBlocksHashCache(block) } stats.processed++ diff --git a/core/headerchain.go b/core/headerchain.go index b3cdc10f68d5..51a876d2ecac 100644 --- a/core/headerchain.go +++ b/core/headerchain.go @@ -101,6 +101,7 @@ func NewHeaderChain(chainDb ethdb.Database, config *params.ChainConfig, engine c } } hc.currentHeaderHash = hc.CurrentHeader().Hash() + headHeaderGauge.Update(hc.CurrentHeader().Number.Int64()) return hc, nil } @@ -176,8 +177,10 @@ func (hc *HeaderChain) WriteHeader(header *types.Header) (status WriteStatus, er if err := WriteHeadHeaderHash(hc.chainDb, hash); err != nil { log.Crit("Failed to insert head header hash", "err", err) } + hc.currentHeaderHash = hash hc.currentHeader.Store(types.CopyHeader(header)) + headHeaderGauge.Update(header.Number.Int64()) status = CanonStatTy } else { @@ -392,8 +395,10 @@ func (hc *HeaderChain) SetCurrentHeader(head *types.Header) { if err := WriteHeadHeaderHash(hc.chainDb, head.Hash()); err != nil { log.Crit("Failed to insert head header hash", "err", err) } + hc.currentHeader.Store(head) hc.currentHeaderHash = head.Hash() + headHeaderGauge.Update(head.Number.Int64()) } // DeleteCallback is a callback function that is called by SetHead before @@ -432,6 +437,7 @@ func (hc *HeaderChain) SetHead(head uint64, delFn DeleteCallback) { hc.currentHeader.Store(hc.genesisHeader) } hc.currentHeaderHash = hc.CurrentHeader().Hash() + headHeaderGauge.Update(hc.CurrentHeader().Number.Int64()) if err := WriteHeadHeaderHash(hc.chainDb, hc.currentHeaderHash); err != nil { log.Crit("Failed to reset head header hash", "err", err) diff --git a/core/state/state_object.go b/core/state/state_object.go index ca695efe3b85..bb19597cc6f9 100644 --- a/core/state/state_object.go +++ b/core/state/state_object.go @@ -21,6 +21,7 @@ import ( "fmt" "io" "math/big" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" @@ -173,6 +174,8 @@ func (s *stateObject) GetCommittedState(db Database, key common.Hash) common.Has if s.fakeStorage != nil { return s.fakeStorage[key] } + // Track the amount of time wasted on reading the storge trie + defer func(start time.Time) { s.db.StorageReads += time.Since(start) }(time.Now()) value := common.Hash{} // Load from DB in case it is missing. enc, err := s.getTrie(db).TryGet(key[:]) @@ -269,6 +272,8 @@ func (s *stateObject) setState(key, value common.Hash) { // updateTrie writes cached storage modifications into the object's storage trie. func (s *stateObject) updateTrie(db Database) Trie { + // Track the amount of time wasted on updating the storge trie + defer func(start time.Time) { s.db.StorageUpdates += time.Since(start) }(time.Now()) tr := s.getTrie(db) for key, value := range s.dirtyStorage { delete(s.dirtyStorage, key) @@ -286,6 +291,10 @@ func (s *stateObject) updateTrie(db Database) Trie { // UpdateRoot sets the trie root to the current root hash of func (s *stateObject) updateRoot(db Database) { s.updateTrie(db) + + // Track the amount of time wasted on hashing the storge trie + defer func(start time.Time) { s.db.StorageHashes += time.Since(start) }(time.Now()) + s.data.Root = s.trie.Hash() } @@ -296,6 +305,9 @@ func (s *stateObject) CommitTrie(db Database) error { if s.dbErr != nil { return s.dbErr } + // Track the amount of time wasted on committing the storge trie + defer func(start time.Time) { s.db.StorageCommits += time.Since(start) }(time.Now()) + root, err := s.trie.Commit(nil) if err == nil { s.data.Root = root diff --git a/core/state/statedb.go b/core/state/statedb.go index 7782643f8a0e..d357e6e1bf58 100644 --- a/core/state/statedb.go +++ b/core/state/statedb.go @@ -22,6 +22,7 @@ import ( "math/big" "sort" "sync" + "time" "github.com/XinFinOrg/XDPoSChain/common" "github.com/XinFinOrg/XDPoSChain/core/types" @@ -80,6 +81,16 @@ type StateDB struct { nextRevisionId int lock sync.Mutex + + // Measurements gathered during execution for debugging purposes + AccountReads time.Duration + AccountHashes time.Duration + AccountUpdates time.Duration + AccountCommits time.Duration + StorageReads time.Duration + StorageHashes time.Duration + StorageUpdates time.Duration + StorageCommits time.Duration } type AccountInfo struct { @@ -446,7 +457,12 @@ func (s *StateDB) GetTransientState(addr common.Address, key common.Hash) common // updateStateObject writes the given object to the trie. func (s *StateDB) updateStateObject(stateObject *stateObject) { + // Track the amount of time wasted on updating the account from the trie + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + + // Encode the account and update the account trie addr := stateObject.Address() + data, err := rlp.EncodeToBytes(stateObject) if err != nil { panic(fmt.Errorf("can't encode object at %x: %v", addr[:], err)) @@ -456,7 +472,12 @@ func (s *StateDB) updateStateObject(stateObject *stateObject) { // deleteStateObject removes the given object from the state trie. func (s *StateDB) deleteStateObject(stateObject *stateObject) { + // Track the amount of time wasted on deleting the account from the trie + defer func(start time.Time) { s.AccountUpdates += time.Since(start) }(time.Now()) + + // Delete the account from the trie stateObject.deleted = true + addr := stateObject.Address() s.setError(s.trie.TryDelete(addr[:])) } @@ -471,15 +492,17 @@ func (s *StateDB) DeleteAddress(addr common.Address) { // Retrieve a state object given my the address. Returns nil if not found. func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) { - // Prefer 'live' objects. + // Prefer live objects if any is available if obj := s.stateObjects[addr]; obj != nil { if obj.deleted { return nil } return obj } + // Track the amount of time wasted on loading the object from the database + defer func(start time.Time) { s.AccountReads += time.Since(start) }(time.Now()) - // Load the object from the database. + // Load the object from the database enc, err := s.trie.TryGet(addr[:]) if len(enc) == 0 { s.setError(err) @@ -490,7 +513,7 @@ func (s *StateDB) getStateObject(addr common.Address) (stateObject *stateObject) log.Error("Failed to decode state object", "addr", addr, "err", err) return nil } - // Insert into the live set. + // Insert into the live set obj := newObject(s, addr, data, s.MarkStateObjectDirty) s.setStateObject(obj) return obj @@ -672,6 +695,10 @@ func (s *StateDB) Finalise(deleteEmptyObjects bool) { // goes into transaction receipts. func (s *StateDB) IntermediateRoot(deleteEmptyObjects bool) common.Hash { s.Finalise(deleteEmptyObjects) + + // Track the amount of time wasted on hashing the account trie + defer func(start time.Time) { s.AccountHashes += time.Since(start) }(time.Now()) + return s.trie.Hash() } @@ -714,7 +741,7 @@ func (s *StateDB) clearJournalAndRefund() { func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) { defer s.clearJournalAndRefund() - // Commit objects to the trie. + // Commit objects to the trie, measuring the elapsed time for addr, stateObject := range s.stateObjects { _, isDirty := s.stateObjectsDirty[addr] switch { @@ -737,7 +764,9 @@ func (s *StateDB) Commit(deleteEmptyObjects bool) (root common.Hash, err error) } delete(s.stateObjectsDirty, addr) } - // Write trie changes. + // Write the account trie changes, measuing the amount of wasted time + defer func(start time.Time) { s.AccountCommits += time.Since(start) }(time.Now()) + root, err = s.trie.Commit(func(leaf []byte, parent common.Hash) error { var account Account if err := rlp.DecodeBytes(leaf, &account); err != nil { diff --git a/eth/downloader/downloader.go b/eth/downloader/downloader.go index 1a500142c839..f5fb6d1c5851 100644 --- a/eth/downloader/downloader.go +++ b/eth/downloader/downloader.go @@ -1586,7 +1586,7 @@ func (d *Downloader) DeliverNodeData(id string, data [][]byte) (err error) { } // deliver injects a new batch of data received from a remote node. -func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter metrics.Meter) (err error) { +func (d *Downloader) deliver(id string, destCh chan dataPack, packet dataPack, inMeter, dropMeter *metrics.Meter) (err error) { // Update the delivery metrics for both good and failed deliveries inMeter.Mark(int64(packet.Items())) defer func() { diff --git a/eth/downloader/queue.go b/eth/downloader/queue.go index 7f55ce733574..b21460c2efd6 100644 --- a/eth/downloader/queue.go +++ b/eth/downloader/queue.go @@ -650,7 +650,7 @@ func (q *queue) ExpireReceipts(timeout time.Duration) map[string]int { // Note, this method expects the queue lock to be already held. The // reason the lock is not obtained in here is because the parameters already need // to access the queue, so they already need a lock anyway. -func (q *queue) expire(timeout time.Duration, pendPool map[string]*fetchRequest, taskQueue *prque.Prque, timeoutMeter metrics.Meter) map[string]int { +func (q *queue) expire(timeout time.Duration, pendPool map[string]*fetchRequest, taskQueue *prque.Prque, timeoutMeter *metrics.Meter) map[string]int { // Iterate over the expired requests and return each to the queue expiries := make(map[string]int) for id, request := range pendPool { @@ -805,7 +805,7 @@ func (q *queue) DeliverReceipts(id string, receiptList [][]*types.Receipt) (int, // reason the lock is not obtained in here is because the parameters already need // to access the queue, so they already need a lock anyway. func (q *queue) deliver(id string, taskPool map[common.Hash]*types.Header, taskQueue *prque.Prque, - pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, reqTimer metrics.Timer, + pendPool map[string]*fetchRequest, donePool map[common.Hash]struct{}, reqTimer *metrics.Timer, results int, reconstruct func(header *types.Header, index int, result *fetchResult) error) (int, error) { // Short circuit if the data was never requested diff --git a/eth/metrics.go b/eth/metrics.go index 2f3bd6bfb839..59d9258a0625 100644 --- a/eth/metrics.go +++ b/eth/metrics.go @@ -66,7 +66,7 @@ type meteredMsgReadWriter struct { // newMeteredMsgWriter wraps a p2p MsgReadWriter with metering support. If the // metrics system is disabled, this function returns the original object. func newMeteredMsgWriter(rw p2p.MsgReadWriter) p2p.MsgReadWriter { - if !metrics.Enabled { + if !metrics.Enabled() { return rw } return &meteredMsgReadWriter{MsgReadWriter: rw} diff --git a/ethdb/leveldb/leveldb.go b/ethdb/leveldb/leveldb.go index 601ffc94a1df..cd4603982695 100644 --- a/ethdb/leveldb/leveldb.go +++ b/ethdb/leveldb/leveldb.go @@ -62,18 +62,18 @@ type Database struct { fn string // filename for reporting db *leveldb.DB // LevelDB instance - compTimeMeter metrics.Meter // Meter for measuring the total time spent in database compaction - compReadMeter metrics.Meter // Meter for measuring the data read during compaction - compWriteMeter metrics.Meter // Meter for measuring the data written during compaction - writeDelayNMeter metrics.Meter // Meter for measuring the write delay number due to database compaction - writeDelayMeter metrics.Meter // Meter for measuring the write delay duration due to database compaction - diskSizeGauge metrics.Gauge // Gauge for tracking the size of all the levels in the database - diskReadMeter metrics.Meter // Meter for measuring the effective amount of data read - diskWriteMeter metrics.Meter // Meter for measuring the effective amount of data written - memCompGauge metrics.Gauge // Gauge for tracking the number of memory compaction - level0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in level0 - nonlevel0CompGauge metrics.Gauge // Gauge for tracking the number of table compaction in non0 level - seekCompGauge metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt + compTimeMeter *metrics.Meter // Meter for measuring the total time spent in database compaction + compReadMeter *metrics.Meter // Meter for measuring the data read during compaction + compWriteMeter *metrics.Meter // Meter for measuring the data written during compaction + writeDelayNMeter *metrics.Meter // Meter for measuring the write delay number due to database compaction + writeDelayMeter *metrics.Meter // Meter for measuring the write delay duration due to database compaction + diskSizeGauge *metrics.Gauge // Gauge for tracking the size of all the levels in the database + diskReadMeter *metrics.Meter // Meter for measuring the effective amount of data read + diskWriteMeter *metrics.Meter // Meter for measuring the effective amount of data written + memCompGauge *metrics.Gauge // Gauge for tracking the number of memory compaction + level0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in level0 + nonlevel0CompGauge *metrics.Gauge // Gauge for tracking the number of table compaction in non0 level + seekCompGauge *metrics.Gauge // Gauge for tracking the number of table compaction caused by read opt quitLock sync.Mutex // Mutex protecting the quit channel access quitChan chan chan error // Quit channel to stop the metrics collection before closing the database diff --git a/go.mod b/go.mod index d849d578c82b..28a24f8b6b38 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,6 @@ require ( github.com/hashicorp/golang-lru v0.5.4 github.com/holiman/uint256 v1.2.4 github.com/huin/goupnp v1.3.0 - github.com/influxdata/influxdb v1.7.9 github.com/jackpal/go-nat-pmp v1.0.2 github.com/julienschmidt/httprouter v1.3.0 github.com/karalabe/hid v1.0.0 @@ -50,6 +49,8 @@ require ( github.com/deckarep/golang-set v1.8.0 github.com/dop251/goja v0.0.0-20200721192441-a695b0cdd498 github.com/ethereum/c-kzg-4844 v0.4.0 + github.com/influxdata/influxdb-client-go/v2 v2.4.0 + github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c github.com/kylelemons/godebug v1.1.0 github.com/mattn/go-isatty v0.0.17 github.com/shirou/gopsutil v3.21.4-0.20210419000835-c7a38de76ee5+incompatible @@ -64,10 +65,12 @@ require ( github.com/consensys/bavard v0.1.13 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 // indirect + github.com/deepmap/oapi-codegen v1.6.0 // indirect github.com/dlclark/regexp2 v1.10.0 // indirect github.com/go-ole/go-ole v1.2.5 // indirect github.com/go-sourcemap/sourcemap v2.1.3+incompatible // indirect github.com/google/uuid v1.3.0 // indirect + github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect @@ -79,11 +82,15 @@ require ( github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 // indirect github.com/supranational/blst v0.3.11 // indirect + github.com/tklauser/go-sysconf v0.3.14 // indirect + github.com/tklauser/numcpus v0.8.0 // indirect github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 // indirect golang.org/x/mod v0.17.0 // indirect + golang.org/x/net v0.25.0 // indirect golang.org/x/term v0.26.0 // indirect golang.org/x/text v0.20.0 // indirect google.golang.org/protobuf v1.31.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect gotest.tools v2.2.0+incompatible // indirect rsc.io/tmplfunc v0.0.3 // indirect diff --git a/go.sum b/go.sum index 55a922e8cc24..68019c92aba9 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,7 @@ github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46t github.com/crate-crypto/go-kzg-4844 v0.7.0 h1:C0vgZRk4q4EZ/JgPfzuSoxdCq3C3mOZMBShovmncxvA= github.com/crate-crypto/go-kzg-4844 v0.7.0/go.mod h1:1kMhvPgI0Ky3yIa+9lFySEBUBXkYxeOi8ZF1sYioxhc= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= +github.com/cyberdelia/templates v0.0.0-20141128023046-ca7fffd4298c/go.mod h1:GyV+0YP4qX0UQ7r2MoYZ+AvYDp12OF5yg4q8rGnyNh4= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -35,6 +36,9 @@ github.com/decred/dcrd/crypto/blake256 v1.0.0 h1:/8DMNYp9SGi5f0w7uCm6d6M4OU2rGFK github.com/decred/dcrd/crypto/blake256 v1.0.0/go.mod h1:sQl2p6Y26YV+ZOcSTP6thNdn47hh8kt6rqSlvmrXFAc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1 h1:YLtO71vCjJRCBcrPMtQ9nqBsqpA1m5sE92cU+pd5Mcc= github.com/decred/dcrd/dcrec/secp256k1/v4 v4.0.1/go.mod h1:hyedUtir6IdtD/7lIxGeCxkaw7y45JueMRL4DIyJDKs= +github.com/deepmap/oapi-codegen v1.6.0 h1:w/d1ntwh91XI0b/8ja7+u5SvA4IFfM0UNNLmiDR1gg0= +github.com/deepmap/oapi-codegen v1.6.0/go.mod h1:ryDa9AgbELGeB+YEXE1dR53yAjHwFvE9iAUlWl9Al3M= +github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/docker/docker v1.4.2-0.20180625184442-8e610b2b55bf h1:sh8rkQZavChcmakYiSlqu2425CHyFXLZZnvm7PDpU8M= @@ -50,10 +54,15 @@ github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYF github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/getkin/kin-openapi v0.53.0/go.mod h1:7Yn5whZr5kJi6t+kShccXS8ae1APpYTW6yheSwk8Yi4= +github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8 h1:DujepqpGd1hyOd7aW59XpK7Qymp8iy83xq74fLr21is= github.com/globalsign/mgo v0.0.0-20181015135952-eeefdecb41b8/go.mod h1:xkRDCp4j0OGD1HRkm4kmhM+pmpv3AKq5SU7GMg4oO/Q= +github.com/go-chi/chi/v5 v5.0.0/go.mod h1:BBug9lr0cqtdAhsu6R4AAdvufI0/XBzAQSsUqJpoZOs= github.com/go-ole/go-ole v1.2.5 h1:t4MGB5xEDZvXI+0rMjjsfBsD7yAgp/s9ZDkL1JndXwY= github.com/go-ole/go-ole v1.2.5/go.mod h1:pprOEPIfldk/42T2oK7lQ4v4JSDwmV0As9GaiUsvbm0= +github.com/go-openapi/jsonpointer v0.19.5/go.mod h1:Pl9vOtqEWErmShwVjC8pYs9cog34VGT37dQOVbmoatg= +github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-sourcemap/sourcemap v2.1.3+incompatible h1:W1iEw64niKVGogNgBN3ePyLFfuisuzeidWPMPWmECqU= github.com/go-sourcemap/sourcemap v2.1.3+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -69,6 +78,7 @@ github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiu github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb h1:PBC98N2aIaM3XXiurYmW7fx4GZkL8feAMVq7nEjURHk= github.com/golang/snappy v0.0.5-0.20220116011046-fa5810519dcb/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golangci/lint-1 v0.0.0-20181222135242-d2cdd8c08219/go.mod h1:/X8TswGSh1pIozq4ZwCfxS0WA5JGXguxk94ar/4c87Y= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= @@ -79,6 +89,7 @@ github.com/google/subcommands v1.2.0/go.mod h1:ZjhPrFU+Olkh9WazFPsl27BQ4UPiG37m3 github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/google/uuid v1.3.0 h1:t6JiXgmwXMjEs8VusXIJk2BXHsn+wx8BZdTaoZ5fu7I= github.com/google/uuid v1.3.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gorilla/mux v1.8.0/go.mod h1:DVbg23sWSpFRCP0SfiEN6jmj59UnW/n46BH5rLB71So= github.com/gorilla/websocket v1.4.2 h1:+/TMaTYc4QFitKJxsQ7Yye35DkWvkdLcvGKqM+x0Ufc= github.com/gorilla/websocket v1.4.2/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= @@ -88,14 +99,19 @@ github.com/holiman/uint256 v1.2.4/go.mod h1:EOMSn4q6Nyt9P6efbI3bueV4e1b3dGlUCXei github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/huin/goupnp v1.3.0 h1:UvLUlWDNpoUdYzb2TCn+MuTWtcjXKSza2n6CBdQ0xXc= github.com/huin/goupnp v1.3.0/go.mod h1:gnGPsThkYa7bFi/KWmEysQRf48l2dvR5bxr2OFckNX8= -github.com/influxdata/influxdb v1.7.9 h1:uSeBTNO4rBkbp1Be5FKRsAmglM9nlx25TzVQRQt1An4= -github.com/influxdata/influxdb v1.7.9/go.mod h1:qZna6X/4elxqT3yI9iZYdZrWWdeFOOprn86kgg4+IzY= +github.com/influxdata/influxdb-client-go/v2 v2.4.0 h1:HGBfZYStlx3Kqvsv1h2pJixbCl/jhnFtxpKFAv9Tu5k= +github.com/influxdata/influxdb-client-go/v2 v2.4.0/go.mod h1:vLNHdxTJkIf2mSLvGrpj8TCcISApPoXkaxP8g9uRlW8= +github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c h1:qSHzRbhzK8RdXOsAdfDgO49TtqC1oZ+acxPrkfTxcCs= +github.com/influxdata/influxdb1-client v0.0.0-20220302092344-a9ab5670611c/go.mod h1:qj24IKcXYK6Iy9ceXlo3Tc+vtHo9lIhSX5JddghvEPo= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839 h1:W9WBk7wlPfJLvMCdtV4zPulc4uCPrlywQOmbFOhgQNU= +github.com/influxdata/line-protocol v0.0.0-20200327222509-2487e7298839/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo= github.com/jackpal/go-nat-pmp v1.0.2 h1:KzKSgb7qkJvOUTqYl9/Hg/me3pWgBmERKrTGD7BdWus= github.com/jackpal/go-nat-pmp v1.0.2/go.mod h1:QPH045xvCAeXUZOxsnwmrtiCoxIr9eob+4orBN1SBKc= github.com/julienschmidt/httprouter v1.3.0 h1:U0609e9tgbseu3rBINet9P48AI/D3oJs4dN7jwJOQ1U= github.com/julienschmidt/httprouter v1.3.0/go.mod h1:JR6WtHb+2LUe8TCKY3cZOxFyyO8IZAc4RVcycCCAKdM= github.com/karalabe/hid v1.0.0 h1:+/CIMNXhSU/zIJgnIvBD2nKHxS/bnRHhhs9xBryLpPo= github.com/karalabe/hid v1.0.0/go.mod h1:Vr51f8rUOLYrfrWDFlV12GGQgM5AT8sVh+2fY4MPeu8= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= @@ -105,11 +121,21 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/labstack/echo/v4 v4.2.1/go.mod h1:AA49e0DZ8kk5jTOOCKNuPR6oTnBS0dYiM4FW1e6jwpg= +github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k= github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c= github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8= +github.com/mailru/easyjson v0.0.0-20190614124828-94de47d64c63/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/mailru/easyjson v0.0.0-20190626092158-b2ccc519800e/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= +github.com/matryer/moq v0.0.0-20190312154309-6cfb0558e1bd/go.mod h1:9ELz6aaclSIGnZBoaSLZ3NAl1VTufbOrXBPvtcy6WiQ= +github.com/mattn/go-colorable v0.1.2/go.mod h1:U0ppj6V5qS13XJ6of8GYAs25YV2eR4EVcfRqFIhoBtE= +github.com/mattn/go-colorable v0.1.7/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.8/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= @@ -142,6 +168,7 @@ github.com/pborman/uuid v1.2.0/go.mod h1:X/NO0urCmaxf9VXbdlT7C2Yzkj2IKimNn4k+gtP github.com/peterh/liner v1.1.1-0.20190123174540-a2c9a5303de7 h1:oYW+YCJ1pachXTQmzR3rNLYGGz4g/UgFcjb28p/viDM= github.com/peterh/liner v1.1.1-0.20190123174540-a2c9a5303de7/go.mod h1:CRroGNssyjTd/qIG2FyxByd2S8JEAZXBl4qUrZf8GS0= github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= +github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -166,60 +193,88 @@ github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 h1:njlZPzLwU639 github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3/go.mod h1:hpGUWaI9xL8pRQCTXQgocU38Qw1g0Us7n5PxxTwTCYU= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= github.com/supranational/blst v0.3.11 h1:LyU6FolezeWAhvQk0k6O/d49jqgO52MSDDfYgbeoEm4= github.com/supranational/blst v0.3.11/go.mod h1:jZJtfjgudtNl4en1tzwPIV3KjUnQUvG3/j+w+fVonLw= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7 h1:epCh84lMvA70Z7CTTCmYQn2CKbY8j86K7/FAIr141uY= github.com/syndtr/goleveldb v1.0.1-0.20210819022825-2ae1ddf74ef7/go.mod h1:q4W45IWZaF22tdD+VEXcAWRA037jwmWEB5VWYORlTpc= +github.com/tklauser/go-sysconf v0.3.14 h1:g5vzr9iPFFz24v2KZXs/pvpvh8/V9Fw6vQK5ZZb78yU= +github.com/tklauser/go-sysconf v0.3.14/go.mod h1:1ym4lWMLUOhuBOPGtRcJm7tEGX4SCYNEEEtghGG/8uY= +github.com/tklauser/numcpus v0.8.0 h1:Mx4Wwe/FjZLeQsK/6kt2EOepwwSl7SmJrK5bV/dXYgY= +github.com/tklauser/numcpus v0.8.0/go.mod h1:ZJZlAY+dmR4eut8epnzf0u/VwodKmryxR8txiloSqBE= github.com/urfave/cli/v2 v2.27.5 h1:WoHEJLdsXr6dDWoJgMq/CboDmyY/8HMMH1fTECbih+w= github.com/urfave/cli/v2 v2.27.5/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasttemplate v1.0.1/go.mod h1:UQGH1tvbgY+Nz5t2n7tXsz52dQxojPUpymEIMZ47gx8= +github.com/valyala/fasttemplate v1.2.1/go.mod h1:KHLXt3tVN2HBp8eijSv/kGJopbvo7S+qRAEEKiv+SiQ= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20200820211705-5c72a883971a/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20201221181555-eec23a3978ad/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20210119194325-5f4716e94777/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180926160741-c2ed4eda69e7/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190916202348-b4ddaad3f8a3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200519105757-fe76b779f299/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200814200057-3d37ad5750ed/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200826173525-f9321e4c35a6/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/time v0.0.0-20201208040808-7e3f01d25324/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= +golang.org/x/time v0.0.0-20210220033141-f8bda1e9f3ba/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191125144606-a911d9008d1f/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d h1:vU5i/LfpvrRCpgM/VPfJLg5KjxD3E+hfT1SH+d9zLwg= golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 h1:go1bK/D/BFZV2I8cIQd1NKEZ+0owSTG1fDTci4IqFcE= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -234,6 +289,7 @@ google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQ google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= @@ -245,6 +301,7 @@ gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6 h1:a6cXbcDDUk gopkg.in/olebedev/go-duktape.v3 v3.0.0-20200619000410-60c24ae608a6/go.mod h1:uAJfkITjFhyEEuUfm7bsmCZRbW5WRq8s9EY8HZ6hCns= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= diff --git a/internal/debug/flags.go b/internal/debug/flags.go index de8d587f0852..cf5b49e4754e 100644 --- a/internal/debug/flags.go +++ b/internal/debug/flags.go @@ -312,17 +312,10 @@ func Setup(ctx *cli.Context) error { // pprof server if ctx.Bool(pprofFlag.Name) { - // Hook go-metrics into expvar on any /debug/metrics request, load all vars - // from the registry into expvar, and execute regular expvar handler. - exp.Exp(metrics.DefaultRegistry) - address := fmt.Sprintf("%s:%d", ctx.String(pprofAddrFlag.Name), ctx.Int(pprofPortFlag.Name)) - go func() { - log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) - if err := http.ListenAndServe(address, nil); err != nil { - log.Error("Failure in running pprof server", "err", err) - } - }() + // This context value ("metrics-addr") represents the utils.MetricsHTTPFlag.Name. + // It cannot be imported because it will cause a cyclical dependency. + StartPProf(address, !ctx.IsSet("metrics-addr") && !ctx.IsSet("metrics.addr")) } if len(logFile) > 0 || rotation { @@ -332,6 +325,20 @@ func Setup(ctx *cli.Context) error { return nil } +func StartPProf(address string, withMetrics bool) { + // Hook go-metrics into expvar on any /debug/metrics request, load all vars + // from the registry into expvar, and execute regular expvar handler. + if withMetrics { + exp.Exp(metrics.DefaultRegistry) + } + log.Info("Starting pprof server", "addr", fmt.Sprintf("http://%s/debug/pprof", address)) + go func() { + if err := http.ListenAndServe(address, nil); err != nil { + log.Error("Failure in running pprof server", "err", err) + } + }() +} + // Exit stops all running profiles, flushing their output to the // respective file. func Exit() { diff --git a/les/metrics.go b/les/metrics.go index 4ba1af2475c5..9215c712eb71 100644 --- a/les/metrics.go +++ b/les/metrics.go @@ -74,7 +74,7 @@ type meteredMsgReadWriter struct { // newMeteredMsgWriter wraps a p2p MsgReadWriter with metering support. If the // metrics system is disabled, this function returns the original object. func newMeteredMsgWriter(rw p2p.MsgReadWriter) p2p.MsgReadWriter { - if !metrics.Enabled { + if !metrics.Enabled() { return rw } return &meteredMsgReadWriter{MsgReadWriter: rw} diff --git a/metrics/README.md b/metrics/README.md index bc2a45a8382d..85b119470a92 100644 --- a/metrics/README.md +++ b/metrics/README.md @@ -5,7 +5,7 @@ go-metrics Go port of Coda Hale's Metrics library: . -Documentation: . +Documentation: . Usage ----- @@ -100,24 +100,6 @@ go influxdb.InfluxDB(metrics.DefaultRegistry, ) ``` -Periodically upload every metric to Librato using the [Librato client](https://github.com/mihasya/go-metrics-librato): - -**Note**: the client included with this repository under the `librato` package -has been deprecated and moved to the repository linked above. - -```go -import "github.com/mihasya/go-metrics-librato" - -go librato.Librato(metrics.DefaultRegistry, - 10e9, // interval - "example@example.com", // account owner email address - "token", // Librato API token - "hostname", // source - []float64{0.95}, // percentiles to send - time.Millisecond, // time unit -) -``` - Periodically emit every metric to StatHat: ```go @@ -128,7 +110,7 @@ go stathat.Stathat(metrics.DefaultRegistry, 10e9, "example@example.com") Maintain all metrics along with expvars at `/debug/metrics`: -This uses the same mechanism as [the official expvar](http://golang.org/pkg/expvar/) +This uses the same mechanism as [the official expvar](https://golang.org/pkg/expvar/) but exposed under `/debug/metrics`, which shows a json representation of all your usual expvars as well as all your go-metrics. @@ -157,7 +139,6 @@ Publishing Metrics Clients are available for the following destinations: -* Librato - https://github.com/mihasya/go-metrics-librato * Graphite - https://github.com/cyberdelia/go-metrics-graphite * InfluxDB - https://github.com/vrischmann/go-metrics-influxdb * Ganglia - https://github.com/appscode/metlia diff --git a/metrics/config.go b/metrics/config.go index 169c683a97b5..6acb985c16e5 100644 --- a/metrics/config.go +++ b/metrics/config.go @@ -19,9 +19,20 @@ package metrics // Config contains the configuration for the metric collection. type Config struct { Enabled bool `toml:",omitempty"` - EnabledExpensive bool `toml:",omitempty"` + EnabledExpensive bool `toml:"-"` HTTP string `toml:",omitempty"` Port int `toml:",omitempty"` + EnableInfluxDB bool `toml:",omitempty"` + InfluxDBEndpoint string `toml:",omitempty"` + InfluxDBDatabase string `toml:",omitempty"` + InfluxDBUsername string `toml:",omitempty"` + InfluxDBPassword string `toml:",omitempty"` + InfluxDBTags string `toml:",omitempty"` + + EnableInfluxDBV2 bool `toml:",omitempty"` + InfluxDBToken string `toml:",omitempty"` + InfluxDBBucket string `toml:",omitempty"` + InfluxDBOrganization string `toml:",omitempty"` } // DefaultConfig is the default config for metrics used in go-ethereum. @@ -30,4 +41,16 @@ var DefaultConfig = Config{ EnabledExpensive: false, HTTP: "127.0.0.1", Port: 6060, + EnableInfluxDB: false, + InfluxDBEndpoint: "http://localhost:8086", + InfluxDBDatabase: "xdc", + InfluxDBUsername: "test", + InfluxDBPassword: "test", + InfluxDBTags: "host=localhost", + + // influxdbv2-specific flags + EnableInfluxDBV2: false, + InfluxDBToken: "test", + InfluxDBBucket: "xdc", + InfluxDBOrganization: "xdc", } diff --git a/metrics/counter.go b/metrics/counter.go index c7f2b4bd3aa3..0f373b0d9289 100644 --- a/metrics/counter.go +++ b/metrics/counter.go @@ -1,112 +1,58 @@ package metrics -import "sync/atomic" - -// Counters hold an int64 value that can be incremented and decremented. -type Counter interface { - Clear() - Count() int64 - Dec(int64) - Inc(int64) - Snapshot() Counter -} +import ( + "sync/atomic" +) // GetOrRegisterCounter returns an existing Counter or constructs and registers -// a new StandardCounter. -func GetOrRegisterCounter(name string, r Registry) Counter { - if nil == r { +// a new Counter. +func GetOrRegisterCounter(name string, r Registry) *Counter { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewCounter).(Counter) + return r.GetOrRegister(name, NewCounter).(*Counter) } -// NewCounter constructs a new StandardCounter. -func NewCounter() Counter { - if !Enabled { - return NilCounter{} - } - return &StandardCounter{0} +// NewCounter constructs a new Counter. +func NewCounter() *Counter { + return new(Counter) } -// NewRegisteredCounter constructs and registers a new StandardCounter. -func NewRegisteredCounter(name string, r Registry) Counter { +// NewRegisteredCounter constructs and registers a new Counter. +func NewRegisteredCounter(name string, r Registry) *Counter { c := NewCounter() - if nil == r { + if r == nil { r = DefaultRegistry } r.Register(name, c) return c } -// CounterSnapshot is a read-only copy of another Counter. +// CounterSnapshot is a read-only copy of a Counter. type CounterSnapshot int64 -// Clear panics. -func (CounterSnapshot) Clear() { - panic("Clear called on a CounterSnapshot") -} - // Count returns the count at the time the snapshot was taken. func (c CounterSnapshot) Count() int64 { return int64(c) } -// Dec panics. -func (CounterSnapshot) Dec(int64) { - panic("Dec called on a CounterSnapshot") -} - -// Inc panics. -func (CounterSnapshot) Inc(int64) { - panic("Inc called on a CounterSnapshot") -} - -// Snapshot returns the snapshot. -func (c CounterSnapshot) Snapshot() Counter { return c } - -// NilCounter is a no-op Counter. -type NilCounter struct{} - -// Clear is a no-op. -func (NilCounter) Clear() {} - -// Count is a no-op. -func (NilCounter) Count() int64 { return 0 } - -// Dec is a no-op. -func (NilCounter) Dec(i int64) {} - -// Inc is a no-op. -func (NilCounter) Inc(i int64) {} - -// Snapshot is a no-op. -func (NilCounter) Snapshot() Counter { return NilCounter{} } - -// StandardCounter is the standard implementation of a Counter and uses the -// sync/atomic package to manage a single int64 value. -type StandardCounter struct { - count int64 -} +// Counter hold an int64 value that can be incremented and decremented. +type Counter atomic.Int64 // Clear sets the counter to zero. -func (c *StandardCounter) Clear() { - atomic.StoreInt64(&c.count, 0) -} - -// Count returns the current count. -func (c *StandardCounter) Count() int64 { - return atomic.LoadInt64(&c.count) +func (c *Counter) Clear() { + (*atomic.Int64)(c).Store(0) } // Dec decrements the counter by the given amount. -func (c *StandardCounter) Dec(i int64) { - atomic.AddInt64(&c.count, -i) +func (c *Counter) Dec(i int64) { + (*atomic.Int64)(c).Add(-i) } // Inc increments the counter by the given amount. -func (c *StandardCounter) Inc(i int64) { - atomic.AddInt64(&c.count, i) +func (c *Counter) Inc(i int64) { + (*atomic.Int64)(c).Add(i) } // Snapshot returns a read-only copy of the counter. -func (c *StandardCounter) Snapshot() Counter { - return CounterSnapshot(c.Count()) +func (c *Counter) Snapshot() CounterSnapshot { + return CounterSnapshot((*atomic.Int64)(c).Load()) } diff --git a/metrics/counter_float64.go b/metrics/counter_float64.go new file mode 100644 index 000000000000..91c4215c4df6 --- /dev/null +++ b/metrics/counter_float64.go @@ -0,0 +1,69 @@ +package metrics + +import ( + "math" + "sync/atomic" +) + +// GetOrRegisterCounterFloat64 returns an existing *CounterFloat64 or constructs and registers +// a new CounterFloat64. +func GetOrRegisterCounterFloat64(name string, r Registry) *CounterFloat64 { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewCounterFloat64).(*CounterFloat64) +} + +// NewCounterFloat64 constructs a new CounterFloat64. +func NewCounterFloat64() *CounterFloat64 { + return new(CounterFloat64) +} + +// NewRegisteredCounterFloat64 constructs and registers a new CounterFloat64. +func NewRegisteredCounterFloat64(name string, r Registry) *CounterFloat64 { + c := NewCounterFloat64() + if r == nil { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// CounterFloat64Snapshot is a read-only copy of a float64 counter. +type CounterFloat64Snapshot float64 + +// Count returns the value at the time the snapshot was taken. +func (c CounterFloat64Snapshot) Count() float64 { return float64(c) } + +// CounterFloat64 holds a float64 value that can be incremented and decremented. +type CounterFloat64 atomic.Uint64 + +// Clear sets the counter to zero. +func (c *CounterFloat64) Clear() { + (*atomic.Uint64)(c).Store(0) +} + +// Dec decrements the counter by the given amount. +func (c *CounterFloat64) Dec(v float64) { + atomicAddFloat((*atomic.Uint64)(c), -v) +} + +// Inc increments the counter by the given amount. +func (c *CounterFloat64) Inc(v float64) { + atomicAddFloat((*atomic.Uint64)(c), v) +} + +// Snapshot returns a read-only copy of the counter. +func (c *CounterFloat64) Snapshot() CounterFloat64Snapshot { + return CounterFloat64Snapshot(math.Float64frombits((*atomic.Uint64)(c).Load())) +} + +func atomicAddFloat(fbits *atomic.Uint64, v float64) { + for { + loadedBits := fbits.Load() + newBits := math.Float64bits(math.Float64frombits(loadedBits) + v) + if fbits.CompareAndSwap(loadedBits, newBits) { + break + } + } +} diff --git a/metrics/counter_float_64_test.go b/metrics/counter_float_64_test.go new file mode 100644 index 000000000000..618cbbbc2b08 --- /dev/null +++ b/metrics/counter_float_64_test.go @@ -0,0 +1,73 @@ +package metrics + +import ( + "sync" + "testing" +) + +func BenchmarkCounterFloat64(b *testing.B) { + c := NewCounterFloat64() + b.ResetTimer() + for i := 0; i < b.N; i++ { + c.Inc(1.0) + } +} + +func BenchmarkCounterFloat64Parallel(b *testing.B) { + c := NewCounterFloat64() + b.ResetTimer() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + c.Inc(1.0) + } + wg.Done() + }() + } + wg.Wait() + if have, want := c.Snapshot().Count(), 10.0*float64(b.N); have != want { + b.Fatalf("have %f want %f", have, want) + } +} + +func TestCounterFloat64(t *testing.T) { + c := NewCounterFloat64() + if count := c.Snapshot().Count(); count != 0 { + t.Errorf("wrong count: %v", count) + } + c.Dec(1.0) + if count := c.Snapshot().Count(); count != -1.0 { + t.Errorf("wrong count: %v", count) + } + snapshot := c.Snapshot() + c.Dec(2.0) + if count := c.Snapshot().Count(); count != -3.0 { + t.Errorf("wrong count: %v", count) + } + c.Inc(1.0) + if count := c.Snapshot().Count(); count != -2.0 { + t.Errorf("wrong count: %v", count) + } + c.Inc(2.0) + if count := c.Snapshot().Count(); count != 0.0 { + t.Errorf("wrong count: %v", count) + } + if count := snapshot.Count(); count != -1.0 { + t.Errorf("snapshot count wrong: %v", count) + } + c.Inc(1.0) + c.Clear() + if count := c.Snapshot().Count(); count != 0.0 { + t.Errorf("wrong count: %v", count) + } +} + +func TestGetOrRegisterCounterFloat64(t *testing.T) { + r := NewRegistry() + NewRegisteredCounterFloat64("foo", r).Inc(47.0) + if c := GetOrRegisterCounterFloat64("foo", r).Snapshot(); c.Count() != 47.0 { + t.Fatal(c) + } +} diff --git a/metrics/counter_test.go b/metrics/counter_test.go index af26ef1548fe..bf0ca6bae44f 100644 --- a/metrics/counter_test.go +++ b/metrics/counter_test.go @@ -14,40 +14,31 @@ func TestCounterClear(t *testing.T) { c := NewCounter() c.Inc(1) c.Clear() - if count := c.Count(); count != 0 { + if count := c.Snapshot().Count(); count != 0 { t.Errorf("c.Count(): 0 != %v\n", count) } } -func TestCounterDec1(t *testing.T) { +func TestCounter(t *testing.T) { c := NewCounter() + if count := c.Snapshot().Count(); count != 0 { + t.Errorf("wrong count: %v", count) + } c.Dec(1) - if count := c.Count(); count != -1 { - t.Errorf("c.Count(): -1 != %v\n", count) + if count := c.Snapshot().Count(); count != -1 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterDec2(t *testing.T) { - c := NewCounter() c.Dec(2) - if count := c.Count(); count != -2 { - t.Errorf("c.Count(): -2 != %v\n", count) + if count := c.Snapshot().Count(); count != -3 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterInc1(t *testing.T) { - c := NewCounter() c.Inc(1) - if count := c.Count(); count != 1 { - t.Errorf("c.Count(): 1 != %v\n", count) + if count := c.Snapshot().Count(); count != -2 { + t.Errorf("wrong count: %v", count) } -} - -func TestCounterInc2(t *testing.T) { - c := NewCounter() c.Inc(2) - if count := c.Count(); count != 2 { - t.Errorf("c.Count(): 2 != %v\n", count) + if count := c.Snapshot().Count(); count != 0 { + t.Errorf("wrong count: %v", count) } } @@ -61,17 +52,10 @@ func TestCounterSnapshot(t *testing.T) { } } -func TestCounterZero(t *testing.T) { - c := NewCounter() - if count := c.Count(); count != 0 { - t.Errorf("c.Count(): 0 != %v\n", count) - } -} - func TestGetOrRegisterCounter(t *testing.T) { r := NewRegistry() NewRegisteredCounter("foo", r).Inc(47) - if c := GetOrRegisterCounter("foo", r); c.Count() != 47 { + if c := GetOrRegisterCounter("foo", r).Snapshot(); c.Count() != 47 { t.Fatal(c) } } diff --git a/metrics/cpu.go b/metrics/cpu.go new file mode 100644 index 000000000000..3a49cd42493a --- /dev/null +++ b/metrics/cpu.go @@ -0,0 +1,25 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package metrics + +// CPUStats is the system and process CPU stats. +// All values are in seconds. +type CPUStats struct { + GlobalTime float64 // Time spent by the CPU working on all processes + GlobalWait float64 // Time spent by waiting on disk for all processes + LocalTime float64 // Time spent by the CPU working on this process +} diff --git a/metrics/cpu_disabled.go b/metrics/cpu_disabled.go new file mode 100644 index 000000000000..025d97aeb32a --- /dev/null +++ b/metrics/cpu_disabled.go @@ -0,0 +1,24 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build ios || js +// +build ios js + +package metrics + +// ReadCPUStats retrieves the current CPU stats. Internally this uses `gosigar`, +// which is not supported on the platforms in this file. +func ReadCPUStats(stats *CPUStats) {} diff --git a/metrics/cpu_enabled.go b/metrics/cpu_enabled.go new file mode 100644 index 000000000000..efb2234c9990 --- /dev/null +++ b/metrics/cpu_enabled.go @@ -0,0 +1,44 @@ +// Copyright 2020 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build !ios && !js +// +build !ios,!js + +package metrics + +import ( + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/shirou/gopsutil/cpu" +) + +// ReadCPUStats retrieves the current CPU stats. +func ReadCPUStats(stats *CPUStats) { + // passing false to request all cpu times + timeStats, err := cpu.Times(false) + if err != nil { + log.Error("Could not read cpu stats", "err", err) + return + } + if len(timeStats) == 0 { + log.Error("Empty cpu stats") + return + } + // requesting all cpu times will always return an array with only one time stats entry + timeStat := timeStats[0] + stats.GlobalTime = timeStat.User + timeStat.Nice + timeStat.System + stats.GlobalWait = timeStat.Iowait + stats.LocalTime = getProcessCPUTime() +} diff --git a/metrics/cputime_nop.go b/metrics/cputime_nop.go new file mode 100644 index 000000000000..465d88c4d232 --- /dev/null +++ b/metrics/cputime_nop.go @@ -0,0 +1,26 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build windows || js +// +build windows js + +package metrics + +// getProcessCPUTime returns 0 on Windows as there is no system call to resolve +// the actual process' CPU time. +func getProcessCPUTime() float64 { + return 0 +} diff --git a/metrics/cputime_unix.go b/metrics/cputime_unix.go new file mode 100644 index 000000000000..a5b50ebaa27f --- /dev/null +++ b/metrics/cputime_unix.go @@ -0,0 +1,36 @@ +// Copyright 2018 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +//go:build !windows && !js +// +build !windows,!js + +package metrics + +import ( + syscall "golang.org/x/sys/unix" + + "github.com/XinFinOrg/XDPoSChain/log" +) + +// getProcessCPUTime retrieves the process' CPU time since program startup. +func getProcessCPUTime() float64 { + var usage syscall.Rusage + if err := syscall.Getrusage(syscall.RUSAGE_SELF, &usage); err != nil { + log.Warn("Failed to retrieve CPU time", "err", err) + return 0 + } + return float64(usage.Utime.Sec+usage.Stime.Sec) + float64(usage.Utime.Usec+usage.Stime.Usec)/1000000 //nolint:unconvert +} diff --git a/metrics/debug.go b/metrics/debug.go index de4a2739fe08..5d0d3992f10b 100644 --- a/metrics/debug.go +++ b/metrics/debug.go @@ -8,29 +8,29 @@ import ( var ( debugMetrics struct { GCStats struct { - LastGC Gauge - NumGC Gauge + LastGC *Gauge + NumGC *Gauge Pause Histogram //PauseQuantiles Histogram - PauseTotal Gauge + PauseTotal *Gauge } - ReadGCStats Timer + ReadGCStats *Timer } gcStats debug.GCStats ) -// Capture new values for the Go garbage collector statistics exported in -// debug.GCStats. This is designed to be called as a goroutine. +// CaptureDebugGCStats captures new values for the Go garbage collector statistics +// exported in debug.GCStats. This is designed to be called as a goroutine. func CaptureDebugGCStats(r Registry, d time.Duration) { for range time.Tick(d) { CaptureDebugGCStatsOnce(r) } } -// Capture new values for the Go garbage collector statistics exported in -// debug.GCStats. This is designed to be called in a background goroutine. -// Giving a registry which has not been given to RegisterDebugGCStats will -// panic. +// CaptureDebugGCStatsOnce captures new values for the Go garbage collector +// statistics exported in debug.GCStats. This is designed to be called in +// a background goroutine. Giving a registry which has not been given to +// RegisterDebugGCStats will panic. // // Be careful (but much less so) with this because debug.ReadGCStats calls // the C function runtime·lock(runtime·mheap) which, while not a stop-the-world @@ -50,9 +50,9 @@ func CaptureDebugGCStatsOnce(r Registry) { debugMetrics.GCStats.PauseTotal.Update(int64(gcStats.PauseTotal)) } -// Register metrics for the Go garbage collector statistics exported in -// debug.GCStats. The metrics are named by their fully-qualified Go symbols, -// i.e. debug.GCStats.PauseTotal. +// RegisterDebugGCStats registers metrics for the Go garbage collector statistics +// exported in debug.GCStats. The metrics are named by their fully-qualified Go +// symbols, i.e. debug.GCStats.PauseTotal. func RegisterDebugGCStats(r Registry) { debugMetrics.GCStats.LastGC = NewGauge() debugMetrics.GCStats.NumGC = NewGauge() diff --git a/metrics/ewma.go b/metrics/ewma.go index 3aecd4fa35a1..194527a79892 100644 --- a/metrics/ewma.go +++ b/metrics/ewma.go @@ -4,115 +4,88 @@ import ( "math" "sync" "sync/atomic" + "time" ) -// EWMAs continuously calculate an exponentially-weighted moving average -// based on an outside source of clock ticks. -type EWMA interface { - Rate() float64 - Snapshot() EWMA - Tick() - Update(int64) -} +// EWMASnapshot is a read-only copy of an EWMA. +type EWMASnapshot float64 + +// Rate returns the rate of events per second at the time the snapshot was +// taken. +func (a EWMASnapshot) Rate() float64 { return float64(a) } // NewEWMA constructs a new EWMA with the given alpha. -func NewEWMA(alpha float64) EWMA { - if !Enabled { - return NilEWMA{} - } - return &StandardEWMA{alpha: alpha} +func NewEWMA(alpha float64) *EWMA { + return &EWMA{alpha: alpha} } // NewEWMA1 constructs a new EWMA for a one-minute moving average. -func NewEWMA1() EWMA { +func NewEWMA1() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/1)) } // NewEWMA5 constructs a new EWMA for a five-minute moving average. -func NewEWMA5() EWMA { +func NewEWMA5() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/5)) } // NewEWMA15 constructs a new EWMA for a fifteen-minute moving average. -func NewEWMA15() EWMA { +func NewEWMA15() *EWMA { return NewEWMA(1 - math.Exp(-5.0/60.0/15)) } -// EWMASnapshot is a read-only copy of another EWMA. -type EWMASnapshot float64 - -// Rate returns the rate of events per second at the time the snapshot was -// taken. -func (a EWMASnapshot) Rate() float64 { return float64(a) } - -// Snapshot returns the snapshot. -func (a EWMASnapshot) Snapshot() EWMA { return a } - -// Tick panics. -func (EWMASnapshot) Tick() { - panic("Tick called on an EWMASnapshot") -} - -// Update panics. -func (EWMASnapshot) Update(int64) { - panic("Update called on an EWMASnapshot") -} - -// NilEWMA is a no-op EWMA. -type NilEWMA struct{} - -// Rate is a no-op. -func (NilEWMA) Rate() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilEWMA) Snapshot() EWMA { return NilEWMA{} } - -// Tick is a no-op. -func (NilEWMA) Tick() {} - -// Update is a no-op. -func (NilEWMA) Update(n int64) {} - -// StandardEWMA is the standard implementation of an EWMA and tracks the number -// of uncounted events and processes them on each tick. It uses the -// sync/atomic package to manage uncounted events. -type StandardEWMA struct { - uncounted int64 // /!\ this should be the first member to ensure 64-bit alignment +// EWMA continuously calculate an exponentially-weighted moving average +// based on an outside source of clock ticks. +type EWMA struct { + uncounted atomic.Int64 alpha float64 - rate float64 - init bool + rate atomic.Uint64 + init atomic.Bool mutex sync.Mutex } -// Rate returns the moving average rate of events per second. -func (a *StandardEWMA) Rate() float64 { - a.mutex.Lock() - defer a.mutex.Unlock() - return a.rate * float64(1e9) -} - // Snapshot returns a read-only copy of the EWMA. -func (a *StandardEWMA) Snapshot() EWMA { - return EWMASnapshot(a.Rate()) +func (a *EWMA) Snapshot() EWMASnapshot { + r := math.Float64frombits(a.rate.Load()) * float64(time.Second) + return EWMASnapshot(r) } -// Tick ticks the clock to update the moving average. It assumes it is called +// tick ticks the clock to update the moving average. It assumes it is called // every five seconds. -func (a *StandardEWMA) Tick() { - count := atomic.LoadInt64(&a.uncounted) - atomic.AddInt64(&a.uncounted, -count) - instantRate := float64(count) / float64(5e9) +func (a *EWMA) tick() { + // Optimization to avoid mutex locking in the hot-path. + if a.init.Load() { + a.updateRate(a.fetchInstantRate()) + return + } + // Slow-path: this is only needed on the first tick() and preserves transactional updating + // of init and rate in the else block. The first conditional is needed below because + // a different thread could have set a.init = 1 between the time of the first atomic load and when + // the lock was acquired. a.mutex.Lock() - defer a.mutex.Unlock() - if a.init { - a.rate += a.alpha * (instantRate - a.rate) + if a.init.Load() { + // The fetchInstantRate() uses atomic loading, which is unnecessary in this critical section + // but again, this section is only invoked on the first successful tick() operation. + a.updateRate(a.fetchInstantRate()) } else { - a.init = true - a.rate = instantRate + a.init.Store(true) + a.rate.Store(math.Float64bits(a.fetchInstantRate())) } + a.mutex.Unlock() +} + +func (a *EWMA) fetchInstantRate() float64 { + count := a.uncounted.Swap(0) + return float64(count) / float64(5*time.Second) +} + +func (a *EWMA) updateRate(instantRate float64) { + currentRate := math.Float64frombits(a.rate.Load()) + currentRate += a.alpha * (instantRate - currentRate) + a.rate.Store(math.Float64bits(currentRate)) } // Update adds n uncounted events. -func (a *StandardEWMA) Update(n int64) { - atomic.AddInt64(&a.uncounted, n) +func (a *EWMA) Update(n int64) { + a.uncounted.Add(n) } diff --git a/metrics/ewma_test.go b/metrics/ewma_test.go index 39e67c605b89..4b9bde3a4b37 100644 --- a/metrics/ewma_test.go +++ b/metrics/ewma_test.go @@ -1,225 +1,89 @@ package metrics -import "testing" +import ( + "math" + "testing" +) + +const epsilon = 0.0000000000000001 func BenchmarkEWMA(b *testing.B) { a := NewEWMA1() b.ResetTimer() for i := 0; i < b.N; i++ { a.Update(1) - a.Tick() + a.tick() } } +func BenchmarkEWMAParallel(b *testing.B) { + a := NewEWMA1() + b.ResetTimer() + + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + a.Update(1) + a.tick() + } + }) +} + func TestEWMA1(t *testing.T) { a := NewEWMA1() a.Update(3) - a.Tick() - if rate := a.Rate(); rate != 0.6 { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.22072766470286553 { - t.Errorf("1 minute a.Rate(): 0.22072766470286553 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.08120116994196772 { - t.Errorf("2 minute a.Rate(): 0.08120116994196772 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.029872241020718428 { - t.Errorf("3 minute a.Rate(): 0.029872241020718428 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.01098938333324054 { - t.Errorf("4 minute a.Rate(): 0.01098938333324054 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.004042768199451294 { - t.Errorf("5 minute a.Rate(): 0.004042768199451294 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.0014872513059998212 { - t.Errorf("6 minute a.Rate(): 0.0014872513059998212 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.0005471291793327122 { - t.Errorf("7 minute a.Rate(): 0.0005471291793327122 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.00020127757674150815 { - t.Errorf("8 minute a.Rate(): 0.00020127757674150815 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 7.404588245200814e-05 { - t.Errorf("9 minute a.Rate(): 7.404588245200814e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 2.7239957857491083e-05 { - t.Errorf("10 minute a.Rate(): 2.7239957857491083e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 1.0021020474147462e-05 { - t.Errorf("11 minute a.Rate(): 1.0021020474147462e-05 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 3.6865274119969525e-06 { - t.Errorf("12 minute a.Rate(): 3.6865274119969525e-06 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 1.3561976441886433e-06 { - t.Errorf("13 minute a.Rate(): 1.3561976441886433e-06 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 4.989172314621449e-07 { - t.Errorf("14 minute a.Rate(): 4.989172314621449e-07 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 1.8354139230109722e-07 { - t.Errorf("15 minute a.Rate(): 1.8354139230109722e-07 != %v\n", rate) + a.tick() + for i, want := range []float64{0.6, + 0.22072766470286553, 0.08120116994196772, 0.029872241020718428, + 0.01098938333324054, 0.004042768199451294, 0.0014872513059998212, + 0.0005471291793327122, 0.00020127757674150815, 7.404588245200814e-05, + 2.7239957857491083e-05, 1.0021020474147462e-05, 3.6865274119969525e-06, + 1.3561976441886433e-06, 4.989172314621449e-07, 1.8354139230109722e-07, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } func TestEWMA5(t *testing.T) { a := NewEWMA5() a.Update(3) - a.Tick() - if rate := a.Rate(); rate != 0.6 { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.49123845184678905 { - t.Errorf("1 minute a.Rate(): 0.49123845184678905 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.4021920276213837 { - t.Errorf("2 minute a.Rate(): 0.4021920276213837 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.32928698165641596 { - t.Errorf("3 minute a.Rate(): 0.32928698165641596 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.269597378470333 { - t.Errorf("4 minute a.Rate(): 0.269597378470333 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.2207276647028654 { - t.Errorf("5 minute a.Rate(): 0.2207276647028654 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.18071652714732128 { - t.Errorf("6 minute a.Rate(): 0.18071652714732128 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.14795817836496392 { - t.Errorf("7 minute a.Rate(): 0.14795817836496392 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.12113791079679326 { - t.Errorf("8 minute a.Rate(): 0.12113791079679326 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.09917933293295193 { - t.Errorf("9 minute a.Rate(): 0.09917933293295193 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.08120116994196763 { - t.Errorf("10 minute a.Rate(): 0.08120116994196763 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.06648189501740036 { - t.Errorf("11 minute a.Rate(): 0.06648189501740036 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.05443077197364752 { - t.Errorf("12 minute a.Rate(): 0.05443077197364752 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.04456414692860035 { - t.Errorf("13 minute a.Rate(): 0.04456414692860035 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.03648603757513079 { - t.Errorf("14 minute a.Rate(): 0.03648603757513079 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.0298722410207183831020718428 { - t.Errorf("15 minute a.Rate(): 0.0298722410207183831020718428 != %v\n", rate) + a.tick() + for i, want := range []float64{ + 0.6, 0.49123845184678905, 0.4021920276213837, 0.32928698165641596, + 0.269597378470333, 0.2207276647028654, 0.18071652714732128, + 0.14795817836496392, 0.12113791079679326, 0.09917933293295193, + 0.08120116994196763, 0.06648189501740036, 0.05443077197364752, + 0.04456414692860035, 0.03648603757513079, 0.0298722410207183831020718428, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } func TestEWMA15(t *testing.T) { a := NewEWMA15() a.Update(3) - a.Tick() - if rate := a.Rate(); rate != 0.6 { - t.Errorf("initial a.Rate(): 0.6 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.5613041910189706 { - t.Errorf("1 minute a.Rate(): 0.5613041910189706 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.5251039914257684 { - t.Errorf("2 minute a.Rate(): 0.5251039914257684 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.4912384518467888184678905 { - t.Errorf("3 minute a.Rate(): 0.4912384518467888184678905 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.459557003018789 { - t.Errorf("4 minute a.Rate(): 0.459557003018789 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.4299187863442732 { - t.Errorf("5 minute a.Rate(): 0.4299187863442732 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.4021920276213831 { - t.Errorf("6 minute a.Rate(): 0.4021920276213831 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.37625345116383313 { - t.Errorf("7 minute a.Rate(): 0.37625345116383313 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.3519877317060185 { - t.Errorf("8 minute a.Rate(): 0.3519877317060185 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.3292869816564153165641596 { - t.Errorf("9 minute a.Rate(): 0.3292869816564153165641596 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.3080502714195546 { - t.Errorf("10 minute a.Rate(): 0.3080502714195546 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.2881831806538789 { - t.Errorf("11 minute a.Rate(): 0.2881831806538789 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.26959737847033216 { - t.Errorf("12 minute a.Rate(): 0.26959737847033216 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.2522102307052083 { - t.Errorf("13 minute a.Rate(): 0.2522102307052083 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.23594443252115815 { - t.Errorf("14 minute a.Rate(): 0.23594443252115815 != %v\n", rate) - } - elapseMinute(a) - if rate := a.Rate(); rate != 0.2207276647028646247028654470286553 { - t.Errorf("15 minute a.Rate(): 0.2207276647028646247028654470286553 != %v\n", rate) + a.tick() + for i, want := range []float64{ + 0.6, 0.5613041910189706, 0.5251039914257684, 0.4912384518467888184678905, + 0.459557003018789, 0.4299187863442732, 0.4021920276213831, + 0.37625345116383313, 0.3519877317060185, 0.3292869816564153165641596, + 0.3080502714195546, 0.2881831806538789, 0.26959737847033216, + 0.2522102307052083, 0.23594443252115815, 0.2207276647028646247028654470286553, + } { + if rate := a.Snapshot().Rate(); math.Abs(want-rate) > epsilon { + t.Errorf("%d minute a.Snapshot().Rate(): %f != %v\n", i, want, rate) + } + elapseMinute(a) } } -func elapseMinute(a EWMA) { +func elapseMinute(a *EWMA) { for i := 0; i < 12; i++ { - a.Tick() + a.tick() } } diff --git a/metrics/exp/exp.go b/metrics/exp/exp.go index 253b4e94903a..149176989d9f 100644 --- a/metrics/exp/exp.go +++ b/metrics/exp/exp.go @@ -10,6 +10,7 @@ import ( "github.com/XinFinOrg/XDPoSChain/log" "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/prometheus" ) type exp struct { @@ -43,6 +44,7 @@ func Exp(r metrics.Registry) { // http.HandleFunc("/debug/vars", e.expHandler) // haven't found an elegant way, so just use a different endpoint http.Handle("/debug/metrics", h) + http.Handle("/debug/metrics/prometheus", prometheus.Handler(r)) } // ExpHandler will return an expvar powered metrics handler. @@ -56,6 +58,7 @@ func ExpHandler(r metrics.Registry) http.Handler { func Setup(address string) { m := http.NewServeMux() m.Handle("/debug/metrics", ExpHandler(metrics.DefaultRegistry)) + m.Handle("/debug/metrics/prometheus", prometheus.Handler(metrics.DefaultRegistry)) log.Info("Starting metrics server", "addr", fmt.Sprintf("http://%s/debug/metrics", address)) go func() { if err := http.ListenAndServe(address, m); err != nil { @@ -92,19 +95,42 @@ func (exp *exp) getFloat(name string) *expvar.Float { return v } -func (exp *exp) publishCounter(name string, metric metrics.Counter) { +func (exp *exp) getInfo(name string) *expvar.String { + var v *expvar.String + exp.expvarLock.Lock() + p := expvar.Get(name) + if p != nil { + v = p.(*expvar.String) + } else { + v = new(expvar.String) + expvar.Publish(name, v) + } + exp.expvarLock.Unlock() + return v +} + +func (exp *exp) publishCounter(name string, metric metrics.CounterSnapshot) { v := exp.getInt(name) v.Set(metric.Count()) } -func (exp *exp) publishGauge(name string, metric metrics.Gauge) { +func (exp *exp) publishCounterFloat64(name string, metric metrics.CounterFloat64Snapshot) { + v := exp.getFloat(name) + v.Set(metric.Count()) +} + +func (exp *exp) publishGauge(name string, metric metrics.GaugeSnapshot) { v := exp.getInt(name) v.Set(metric.Value()) } -func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64) { +func (exp *exp) publishGaugeFloat64(name string, metric metrics.GaugeFloat64Snapshot) { exp.getFloat(name).Set(metric.Value()) } +func (exp *exp) publishGaugeInfo(name string, metric metrics.GaugeInfoSnapshot) { + exp.getInfo(name).Set(metric.Value().String()) +} + func (exp *exp) publishHistogram(name string, metric metrics.Histogram) { h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -120,7 +146,7 @@ func (exp *exp) publishHistogram(name string, metric metrics.Histogram) { exp.getFloat(name + ".999-percentile").Set(ps[4]) } -func (exp *exp) publishMeter(name string, metric metrics.Meter) { +func (exp *exp) publishMeter(name string, metric *metrics.Meter) { m := metric.Snapshot() exp.getInt(name + ".count").Set(m.Count()) exp.getFloat(name + ".one-minute").Set(m.Rate1()) @@ -129,7 +155,7 @@ func (exp *exp) publishMeter(name string, metric metrics.Meter) { exp.getFloat(name + ".mean").Set(m.RateMean()) } -func (exp *exp) publishTimer(name string, metric metrics.Timer) { +func (exp *exp) publishTimer(name string, metric *metrics.Timer) { t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) exp.getInt(name + ".count").Set(t.Count()) @@ -148,33 +174,37 @@ func (exp *exp) publishTimer(name string, metric metrics.Timer) { exp.getFloat(name + ".mean-rate").Set(t.RateMean()) } -func (exp *exp) publishResettingTimer(name string, metric metrics.ResettingTimer) { +func (exp *exp) publishResettingTimer(name string, metric *metrics.ResettingTimer) { t := metric.Snapshot() - ps := t.Percentiles([]float64{50, 75, 95, 99}) - exp.getInt(name + ".count").Set(int64(len(t.Values()))) + ps := t.Percentiles([]float64{0.50, 0.75, 0.95, 0.99}) + exp.getInt(name + ".count").Set(int64(t.Count())) exp.getFloat(name + ".mean").Set(t.Mean()) - exp.getInt(name + ".50-percentile").Set(ps[0]) - exp.getInt(name + ".75-percentile").Set(ps[1]) - exp.getInt(name + ".95-percentile").Set(ps[2]) - exp.getInt(name + ".99-percentile").Set(ps[3]) + exp.getFloat(name + ".50-percentile").Set(ps[0]) + exp.getFloat(name + ".75-percentile").Set(ps[1]) + exp.getFloat(name + ".95-percentile").Set(ps[2]) + exp.getFloat(name + ".99-percentile").Set(ps[3]) } func (exp *exp) syncToExpvar() { exp.registry.Each(func(name string, i interface{}) { switch i := i.(type) { - case metrics.Counter: - exp.publishCounter(name, i) - case metrics.Gauge: - exp.publishGauge(name, i) - case metrics.GaugeFloat64: - exp.publishGaugeFloat64(name, i) + case *metrics.Counter: + exp.publishCounter(name, i.Snapshot()) + case *metrics.CounterFloat64: + exp.publishCounterFloat64(name, i.Snapshot()) + case *metrics.Gauge: + exp.publishGauge(name, i.Snapshot()) + case *metrics.GaugeFloat64: + exp.publishGaugeFloat64(name, i.Snapshot()) + case *metrics.GaugeInfo: + exp.publishGaugeInfo(name, i.Snapshot()) case metrics.Histogram: exp.publishHistogram(name, i) - case metrics.Meter: + case *metrics.Meter: exp.publishMeter(name, i) - case metrics.Timer: + case *metrics.Timer: exp.publishTimer(name, i) - case metrics.ResettingTimer: + case *metrics.ResettingTimer: exp.publishResettingTimer(name, i) default: panic(fmt.Sprintf("unsupported type for '%s': %T", name, i)) diff --git a/metrics/gauge.go b/metrics/gauge.go index b6b2758b0d13..ba7843e03b27 100644 --- a/metrics/gauge.go +++ b/metrics/gauge.go @@ -2,157 +2,69 @@ package metrics import "sync/atomic" -// Gauges hold an int64 value that can be set arbitrarily. -type Gauge interface { - Snapshot() Gauge - Update(int64) - Dec(int64) - Inc(int64) - Value() int64 -} +// GaugeSnapshot is a read-only copy of a Gauge. +type GaugeSnapshot int64 + +// Value returns the value at the time the snapshot was taken. +func (g GaugeSnapshot) Value() int64 { return int64(g) } // GetOrRegisterGauge returns an existing Gauge or constructs and registers a -// new StandardGauge. -func GetOrRegisterGauge(name string, r Registry) Gauge { - if nil == r { +// new Gauge. +func GetOrRegisterGauge(name string, r Registry) *Gauge { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewGauge).(Gauge) + return r.GetOrRegister(name, NewGauge).(*Gauge) } -// NewGauge constructs a new StandardGauge. -func NewGauge() Gauge { - if !Enabled { - return NilGauge{} - } - return &StandardGauge{0} +// NewGauge constructs a new Gauge. +func NewGauge() *Gauge { + return &Gauge{} } -// NewRegisteredGauge constructs and registers a new StandardGauge. -func NewRegisteredGauge(name string, r Registry) Gauge { +// NewRegisteredGauge constructs and registers a new Gauge. +func NewRegisteredGauge(name string, r Registry) *Gauge { c := NewGauge() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} - -// NewFunctionalGauge constructs a new FunctionalGauge. -func NewFunctionalGauge(f func() int64) Gauge { - if !Enabled { - return NilGauge{} - } - return &FunctionalGauge{value: f} -} - -// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. -func NewRegisteredFunctionalGauge(name string, r Registry, f func() int64) Gauge { - c := NewFunctionalGauge(f) - if nil == r { + if r == nil { r = DefaultRegistry } r.Register(name, c) return c } -// GaugeSnapshot is a read-only copy of another Gauge. -type GaugeSnapshot int64 - -// Snapshot returns the snapshot. -func (g GaugeSnapshot) Snapshot() Gauge { return g } - -// Update panics. -func (GaugeSnapshot) Update(int64) { - panic("Update called on a GaugeSnapshot") -} - -// Dec panics. -func (GaugeSnapshot) Dec(int64) { - panic("Dec called on a GaugeSnapshot") -} - -// Inc panics. -func (GaugeSnapshot) Inc(int64) { - panic("Inc called on a GaugeSnapshot") -} - -// Value returns the value at the time the snapshot was taken. -func (g GaugeSnapshot) Value() int64 { return int64(g) } - -// NilGauge is a no-op Gauge. -type NilGauge struct{} - -// Snapshot is a no-op. -func (NilGauge) Snapshot() Gauge { return NilGauge{} } - -// Update is a no-op. -func (NilGauge) Update(v int64) {} - -// Dec is a no-op. -func (NilGauge) Dec(i int64) {} - -// Inc is a no-op. -func (NilGauge) Inc(i int64) {} - -// Value is a no-op. -func (NilGauge) Value() int64 { return 0 } - -// StandardGauge is the standard implementation of a Gauge and uses the -// sync/atomic package to manage a single int64 value. -type StandardGauge struct { - value int64 -} +// Gauge holds an int64 value that can be set arbitrarily. +type Gauge atomic.Int64 // Snapshot returns a read-only copy of the gauge. -func (g *StandardGauge) Snapshot() Gauge { - return GaugeSnapshot(g.Value()) +func (g *Gauge) Snapshot() GaugeSnapshot { + return GaugeSnapshot((*atomic.Int64)(g).Load()) } // Update updates the gauge's value. -func (g *StandardGauge) Update(v int64) { - atomic.StoreInt64(&g.value, v) -} - -// Value returns the gauge's current value. -func (g *StandardGauge) Value() int64 { - return atomic.LoadInt64(&g.value) +func (g *Gauge) Update(v int64) { + (*atomic.Int64)(g).Store(v) +} + +// UpdateIfGt updates the gauge's value if v is larger then the current value. +func (g *Gauge) UpdateIfGt(v int64) { + value := (*atomic.Int64)(g) + for { + exist := value.Load() + if exist >= v { + break + } + if value.CompareAndSwap(exist, v) { + break + } + } } // Dec decrements the gauge's current value by the given amount. -func (g *StandardGauge) Dec(i int64) { - atomic.AddInt64(&g.value, -i) +func (g *Gauge) Dec(i int64) { + (*atomic.Int64)(g).Add(-i) } // Inc increments the gauge's current value by the given amount. -func (g *StandardGauge) Inc(i int64) { - atomic.AddInt64(&g.value, i) -} - -// FunctionalGauge returns value from given function -type FunctionalGauge struct { - value func() int64 -} - -// Value returns the gauge's current value. -func (g FunctionalGauge) Value() int64 { - return g.value() -} - -// Snapshot returns the snapshot. -func (g FunctionalGauge) Snapshot() Gauge { return GaugeSnapshot(g.Value()) } - -// Update panics. -func (FunctionalGauge) Update(int64) { - panic("Update called on a FunctionalGauge") -} - -// Dec panics. -func (FunctionalGauge) Dec(int64) { - panic("Dec called on a FunctionalGauge") -} - -// Inc panics. -func (FunctionalGauge) Inc(int64) { - panic("Inc called on a FunctionalGauge") +func (g *Gauge) Inc(i int64) { + (*atomic.Int64)(g).Add(i) } diff --git a/metrics/gauge_float64.go b/metrics/gauge_float64.go index 66819c957774..05b401ef9cb7 100644 --- a/metrics/gauge_float64.go +++ b/metrics/gauge_float64.go @@ -1,54 +1,33 @@ package metrics -import "sync" - -// GaugeFloat64s hold a float64 value that can be set arbitrarily. -type GaugeFloat64 interface { - Snapshot() GaugeFloat64 - Update(float64) - Value() float64 -} +import ( + "math" + "sync/atomic" +) // GetOrRegisterGaugeFloat64 returns an existing GaugeFloat64 or constructs and registers a -// new StandardGaugeFloat64. -func GetOrRegisterGaugeFloat64(name string, r Registry) GaugeFloat64 { +// new GaugeFloat64. +func GetOrRegisterGaugeFloat64(name string, r Registry) *GaugeFloat64 { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewGaugeFloat64()).(GaugeFloat64) + return r.GetOrRegister(name, NewGaugeFloat64()).(*GaugeFloat64) } -// NewGaugeFloat64 constructs a new StandardGaugeFloat64. -func NewGaugeFloat64() GaugeFloat64 { - if !Enabled { - return NilGaugeFloat64{} - } - return &StandardGaugeFloat64{ - value: 0.0, - } -} +// GaugeFloat64Snapshot is a read-only copy of a GaugeFloat64. +type GaugeFloat64Snapshot float64 -// NewRegisteredGaugeFloat64 constructs and registers a new StandardGaugeFloat64. -func NewRegisteredGaugeFloat64(name string, r Registry) GaugeFloat64 { - c := NewGaugeFloat64() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c -} +// Value returns the value at the time the snapshot was taken. +func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } -// NewFunctionalGauge constructs a new FunctionalGauge. -func NewFunctionalGaugeFloat64(f func() float64) GaugeFloat64 { - if !Enabled { - return NilGaugeFloat64{} - } - return &FunctionalGaugeFloat64{value: f} +// NewGaugeFloat64 constructs a new GaugeFloat64. +func NewGaugeFloat64() *GaugeFloat64 { + return new(GaugeFloat64) } -// NewRegisteredFunctionalGauge constructs and registers a new StandardGauge. -func NewRegisteredFunctionalGaugeFloat64(name string, r Registry, f func() float64) GaugeFloat64 { - c := NewFunctionalGaugeFloat64(f) +// NewRegisteredGaugeFloat64 constructs and registers a new GaugeFloat64. +func NewRegisteredGaugeFloat64(name string, r Registry) *GaugeFloat64 { + c := NewGaugeFloat64() if nil == r { r = DefaultRegistry } @@ -56,72 +35,16 @@ func NewRegisteredFunctionalGaugeFloat64(name string, r Registry, f func() float return c } -// GaugeFloat64Snapshot is a read-only copy of another GaugeFloat64. -type GaugeFloat64Snapshot float64 - -// Snapshot returns the snapshot. -func (g GaugeFloat64Snapshot) Snapshot() GaugeFloat64 { return g } - -// Update panics. -func (GaugeFloat64Snapshot) Update(float64) { - panic("Update called on a GaugeFloat64Snapshot") -} - -// Value returns the value at the time the snapshot was taken. -func (g GaugeFloat64Snapshot) Value() float64 { return float64(g) } - -// NilGauge is a no-op Gauge. -type NilGaugeFloat64 struct{} - -// Snapshot is a no-op. -func (NilGaugeFloat64) Snapshot() GaugeFloat64 { return NilGaugeFloat64{} } - -// Update is a no-op. -func (NilGaugeFloat64) Update(v float64) {} - -// Value is a no-op. -func (NilGaugeFloat64) Value() float64 { return 0.0 } - -// StandardGaugeFloat64 is the standard implementation of a GaugeFloat64 and uses -// sync.Mutex to manage a single float64 value. -type StandardGaugeFloat64 struct { - mutex sync.Mutex - value float64 -} +// GaugeFloat64 hold a float64 value that can be set arbitrarily. +type GaugeFloat64 atomic.Uint64 // Snapshot returns a read-only copy of the gauge. -func (g *StandardGaugeFloat64) Snapshot() GaugeFloat64 { - return GaugeFloat64Snapshot(g.Value()) +func (g *GaugeFloat64) Snapshot() GaugeFloat64Snapshot { + v := math.Float64frombits((*atomic.Uint64)(g).Load()) + return GaugeFloat64Snapshot(v) } // Update updates the gauge's value. -func (g *StandardGaugeFloat64) Update(v float64) { - g.mutex.Lock() - defer g.mutex.Unlock() - g.value = v -} - -// Value returns the gauge's current value. -func (g *StandardGaugeFloat64) Value() float64 { - g.mutex.Lock() - defer g.mutex.Unlock() - return g.value -} - -// FunctionalGaugeFloat64 returns value from given function -type FunctionalGaugeFloat64 struct { - value func() float64 -} - -// Value returns the gauge's current value. -func (g FunctionalGaugeFloat64) Value() float64 { - return g.value() -} - -// Snapshot returns the snapshot. -func (g FunctionalGaugeFloat64) Snapshot() GaugeFloat64 { return GaugeFloat64Snapshot(g.Value()) } - -// Update panics. -func (FunctionalGaugeFloat64) Update(float64) { - panic("Update called on a FunctionalGaugeFloat64") +func (g *GaugeFloat64) Update(v float64) { + (*atomic.Uint64)(g).Store(math.Float64bits(v)) } diff --git a/metrics/gauge_float64_test.go b/metrics/gauge_float64_test.go index 3ee568e7ba09..194a18821f83 100644 --- a/metrics/gauge_float64_test.go +++ b/metrics/gauge_float64_test.go @@ -1,8 +1,11 @@ package metrics -import "testing" +import ( + "sync" + "testing" +) -func BenchmarkGuageFloat64(b *testing.B) { +func BenchmarkGaugeFloat64(b *testing.B) { g := NewGaugeFloat64() b.ResetTimer() for i := 0; i < b.N; i++ { @@ -10,50 +13,39 @@ func BenchmarkGuageFloat64(b *testing.B) { } } -func TestGaugeFloat64(t *testing.T) { - g := NewGaugeFloat64() - g.Update(float64(47.0)) - if v := g.Value(); float64(47.0) != v { - t.Errorf("g.Value(): 47.0 != %v\n", v) +func BenchmarkGaugeFloat64Parallel(b *testing.B) { + c := NewGaugeFloat64() + var wg sync.WaitGroup + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + c.Update(float64(i)) + } + wg.Done() + }() + } + wg.Wait() + if have, want := c.Snapshot().Value(), float64(b.N-1); have != want { + b.Fatalf("have %f want %f", have, want) } } func TestGaugeFloat64Snapshot(t *testing.T) { g := NewGaugeFloat64() - g.Update(float64(47.0)) + g.Update(47.0) snapshot := g.Snapshot() g.Update(float64(0)) - if v := snapshot.Value(); float64(47.0) != v { + if v := snapshot.Value(); v != 47.0 { t.Errorf("g.Value(): 47.0 != %v\n", v) } } func TestGetOrRegisterGaugeFloat64(t *testing.T) { r := NewRegistry() - NewRegisteredGaugeFloat64("foo", r).Update(float64(47.0)) + NewRegisteredGaugeFloat64("foo", r).Update(47.0) t.Logf("registry: %v", r) - if g := GetOrRegisterGaugeFloat64("foo", r); float64(47.0) != g.Value() { - t.Fatal(g) - } -} - -func TestFunctionalGaugeFloat64(t *testing.T) { - var counter float64 - fg := NewFunctionalGaugeFloat64(func() float64 { - counter++ - return counter - }) - fg.Value() - fg.Value() - if counter != 2 { - t.Error("counter != 2") - } -} - -func TestGetOrRegisterFunctionalGaugeFloat64(t *testing.T) { - r := NewRegistry() - NewRegisteredFunctionalGaugeFloat64("foo", r, func() float64 { return 47 }) - if g := GetOrRegisterGaugeFloat64("foo", r); g.Value() != 47 { + if g := GetOrRegisterGaugeFloat64("foo", r).Snapshot(); g.Value() != 47.0 { t.Fatal(g) } } diff --git a/metrics/gauge_info.go b/metrics/gauge_info.go new file mode 100644 index 000000000000..2f78455649e4 --- /dev/null +++ b/metrics/gauge_info.go @@ -0,0 +1,64 @@ +package metrics + +import ( + "encoding/json" + "sync" +) + +// GaugeInfoValue is a mapping of keys to values +type GaugeInfoValue map[string]string + +func (val GaugeInfoValue) String() string { + data, _ := json.Marshal(val) + return string(data) +} + +// GetOrRegisterGaugeInfo returns an existing GaugeInfo or constructs and registers a +// new GaugeInfo. +func GetOrRegisterGaugeInfo(name string, r Registry) *GaugeInfo { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, NewGaugeInfo()).(*GaugeInfo) +} + +// NewGaugeInfo constructs a new GaugeInfo. +func NewGaugeInfo() *GaugeInfo { + return &GaugeInfo{ + value: GaugeInfoValue{}, + } +} + +// NewRegisteredGaugeInfo constructs and registers a new GaugeInfo. +func NewRegisteredGaugeInfo(name string, r Registry) *GaugeInfo { + c := NewGaugeInfo() + if nil == r { + r = DefaultRegistry + } + r.Register(name, c) + return c +} + +// gaugeInfoSnapshot is a read-only copy of another GaugeInfo. +type GaugeInfoSnapshot GaugeInfoValue + +// Value returns the value at the time the snapshot was taken. +func (g GaugeInfoSnapshot) Value() GaugeInfoValue { return GaugeInfoValue(g) } + +// GaugeInfo maintains a set of key/value mappings. +type GaugeInfo struct { + mutex sync.Mutex + value GaugeInfoValue +} + +// Snapshot returns a read-only copy of the gauge. +func (g *GaugeInfo) Snapshot() GaugeInfoSnapshot { + return GaugeInfoSnapshot(g.value) +} + +// Update updates the gauge's value. +func (g *GaugeInfo) Update(v GaugeInfoValue) { + g.mutex.Lock() + defer g.mutex.Unlock() + g.value = v +} diff --git a/metrics/gauge_info_test.go b/metrics/gauge_info_test.go new file mode 100644 index 000000000000..319afbf92e8f --- /dev/null +++ b/metrics/gauge_info_test.go @@ -0,0 +1,36 @@ +package metrics + +import ( + "testing" +) + +func TestGaugeInfoJsonString(t *testing.T) { + g := NewGaugeInfo() + g.Update(GaugeInfoValue{ + "chain_id": "5", + "anotherKey": "any_string_value", + "third_key": "anything", + }, + ) + want := `{"anotherKey":"any_string_value","chain_id":"5","third_key":"anything"}` + + original := g.Snapshot() + g.Update(GaugeInfoValue{"value": "updated"}) + + if have := original.Value().String(); have != want { + t.Errorf("\nhave: %v\nwant: %v\n", have, want) + } + if have, want := g.Snapshot().Value().String(), `{"value":"updated"}`; have != want { + t.Errorf("\nhave: %v\nwant: %v\n", have, want) + } +} + +func TestGetOrRegisterGaugeInfo(t *testing.T) { + r := NewRegistry() + NewRegisteredGaugeInfo("foo", r).Update( + GaugeInfoValue{"chain_id": "5"}) + g := GetOrRegisterGaugeInfo("foo", r).Snapshot() + if have, want := g.Value().String(), `{"chain_id":"5"}`; have != want { + t.Errorf("have\n%v\nwant\n%v\n", have, want) + } +} diff --git a/metrics/gauge_test.go b/metrics/gauge_test.go index 3aee143455c3..f2ba930bc465 100644 --- a/metrics/gauge_test.go +++ b/metrics/gauge_test.go @@ -1,11 +1,10 @@ package metrics import ( - "fmt" "testing" ) -func BenchmarkGuage(b *testing.B) { +func BenchmarkGauge(b *testing.B) { g := NewGauge() b.ResetTimer() for i := 0; i < b.N; i++ { @@ -13,14 +12,6 @@ func BenchmarkGuage(b *testing.B) { } } -func TestGauge(t *testing.T) { - g := NewGauge() - g.Update(int64(47)) - if v := g.Value(); v != 47 { - t.Errorf("g.Value(): 47 != %v\n", v) - } -} - func TestGaugeSnapshot(t *testing.T) { g := NewGauge() g.Update(int64(47)) @@ -34,35 +25,7 @@ func TestGaugeSnapshot(t *testing.T) { func TestGetOrRegisterGauge(t *testing.T) { r := NewRegistry() NewRegisteredGauge("foo", r).Update(47) - if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { - t.Fatal(g) - } -} - -func TestFunctionalGauge(t *testing.T) { - var counter int64 - fg := NewFunctionalGauge(func() int64 { - counter++ - return counter - }) - fg.Value() - fg.Value() - if counter != 2 { - t.Error("counter != 2") - } -} - -func TestGetOrRegisterFunctionalGauge(t *testing.T) { - r := NewRegistry() - NewRegisteredFunctionalGauge("foo", r, func() int64 { return 47 }) - if g := GetOrRegisterGauge("foo", r); g.Value() != 47 { + if g := GetOrRegisterGauge("foo", r); g.Snapshot().Value() != 47 { t.Fatal(g) } } - -func ExampleGetOrRegisterGauge() { - m := "server.bytes_sent" - g := GetOrRegisterGauge(m, nil) - g.Update(47) - fmt.Println(g.Value()) // Output: 47 -} diff --git a/metrics/graphite.go b/metrics/graphite.go deleted file mode 100644 index 142eec86beb4..000000000000 --- a/metrics/graphite.go +++ /dev/null @@ -1,113 +0,0 @@ -package metrics - -import ( - "bufio" - "fmt" - "log" - "net" - "strconv" - "strings" - "time" -) - -// GraphiteConfig provides a container with configuration parameters for -// the Graphite exporter -type GraphiteConfig struct { - Addr *net.TCPAddr // Network address to connect to - Registry Registry // Registry to be exported - FlushInterval time.Duration // Flush interval - DurationUnit time.Duration // Time conversion unit for durations - Prefix string // Prefix to be prepended to metric names - Percentiles []float64 // Percentiles to export from timers and histograms -} - -// Graphite is a blocking exporter function which reports metrics in r -// to a graphite server located at addr, flushing them every d duration -// and prepending metric names with prefix. -func Graphite(r Registry, d time.Duration, prefix string, addr *net.TCPAddr) { - GraphiteWithConfig(GraphiteConfig{ - Addr: addr, - Registry: r, - FlushInterval: d, - DurationUnit: time.Nanosecond, - Prefix: prefix, - Percentiles: []float64{0.5, 0.75, 0.95, 0.99, 0.999}, - }) -} - -// GraphiteWithConfig is a blocking exporter function just like Graphite, -// but it takes a GraphiteConfig instead. -func GraphiteWithConfig(c GraphiteConfig) { - log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") - for range time.Tick(c.FlushInterval) { - if err := graphite(&c); nil != err { - log.Println(err) - } - } -} - -// GraphiteOnce performs a single submission to Graphite, returning a -// non-nil error on failed connections. This can be used in a loop -// similar to GraphiteWithConfig for custom error handling. -func GraphiteOnce(c GraphiteConfig) error { - log.Printf("WARNING: This go-metrics client has been DEPRECATED! It has been moved to https://github.com/cyberdelia/go-metrics-graphite and will be removed from rcrowley/go-metrics on August 12th 2015") - return graphite(&c) -} - -func graphite(c *GraphiteConfig) error { - now := time.Now().Unix() - du := float64(c.DurationUnit) - conn, err := net.DialTCP("tcp", nil, c.Addr) - if nil != err { - return err - } - defer conn.Close() - w := bufio.NewWriter(conn) - c.Registry.Each(func(name string, i interface{}) { - switch metric := i.(type) { - case Counter: - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, metric.Count(), now) - case Gauge: - fmt.Fprintf(w, "%s.%s.value %d %d\n", c.Prefix, name, metric.Value(), now) - case GaugeFloat64: - fmt.Fprintf(w, "%s.%s.value %f %d\n", c.Prefix, name, metric.Value(), now) - case Histogram: - h := metric.Snapshot() - ps := h.Percentiles(c.Percentiles) - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, h.Count(), now) - fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, h.Min(), now) - fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, h.Max(), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, h.Mean(), now) - fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, h.StdDev(), now) - for psIdx, psKey := range c.Percentiles { - key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) - fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) - } - case Meter: - m := metric.Snapshot() - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, m.Count(), now) - fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, m.Rate1(), now) - fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, m.Rate5(), now) - fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, m.Rate15(), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, m.RateMean(), now) - case Timer: - t := metric.Snapshot() - ps := t.Percentiles(c.Percentiles) - fmt.Fprintf(w, "%s.%s.count %d %d\n", c.Prefix, name, t.Count(), now) - fmt.Fprintf(w, "%s.%s.min %d %d\n", c.Prefix, name, t.Min()/int64(du), now) - fmt.Fprintf(w, "%s.%s.max %d %d\n", c.Prefix, name, t.Max()/int64(du), now) - fmt.Fprintf(w, "%s.%s.mean %.2f %d\n", c.Prefix, name, t.Mean()/du, now) - fmt.Fprintf(w, "%s.%s.std-dev %.2f %d\n", c.Prefix, name, t.StdDev()/du, now) - for psIdx, psKey := range c.Percentiles { - key := strings.Replace(strconv.FormatFloat(psKey*100.0, 'f', -1, 64), ".", "", 1) - fmt.Fprintf(w, "%s.%s.%s-percentile %.2f %d\n", c.Prefix, name, key, ps[psIdx], now) - } - fmt.Fprintf(w, "%s.%s.one-minute %.2f %d\n", c.Prefix, name, t.Rate1(), now) - fmt.Fprintf(w, "%s.%s.five-minute %.2f %d\n", c.Prefix, name, t.Rate5(), now) - fmt.Fprintf(w, "%s.%s.fifteen-minute %.2f %d\n", c.Prefix, name, t.Rate15(), now) - fmt.Fprintf(w, "%s.%s.mean-rate %.2f %d\n", c.Prefix, name, t.RateMean(), now) - } - w.Flush() - }) - return nil -} diff --git a/metrics/graphite_test.go b/metrics/graphite_test.go deleted file mode 100644 index c797c781df6f..000000000000 --- a/metrics/graphite_test.go +++ /dev/null @@ -1,22 +0,0 @@ -package metrics - -import ( - "net" - "time" -) - -func ExampleGraphite() { - addr, _ := net.ResolveTCPAddr("net", ":2003") - go Graphite(DefaultRegistry, 1*time.Second, "some.prefix", addr) -} - -func ExampleGraphiteWithConfig() { - addr, _ := net.ResolveTCPAddr("net", ":2003") - go GraphiteWithConfig(GraphiteConfig{ - Addr: addr, - Registry: DefaultRegistry, - FlushInterval: 1 * time.Second, - DurationUnit: time.Millisecond, - Percentiles: []float64{0.5, 0.75, 0.99, 0.999}, - }) -} diff --git a/metrics/healthcheck.go b/metrics/healthcheck.go index f1ae31e34aee..435e5e0bf93d 100644 --- a/metrics/healthcheck.go +++ b/metrics/healthcheck.go @@ -1,61 +1,35 @@ package metrics -// Healthchecks hold an error value describing an arbitrary up/down status. -type Healthcheck interface { - Check() - Error() error - Healthy() - Unhealthy(error) -} - // NewHealthcheck constructs a new Healthcheck which will use the given // function to update its status. -func NewHealthcheck(f func(Healthcheck)) Healthcheck { - if !Enabled { - return NilHealthcheck{} - } - return &StandardHealthcheck{nil, f} +func NewHealthcheck(f func(*Healthcheck)) *Healthcheck { + return &Healthcheck{nil, f} } -// NilHealthcheck is a no-op. -type NilHealthcheck struct{} - -// Check is a no-op. -func (NilHealthcheck) Check() {} - -// Error is a no-op. -func (NilHealthcheck) Error() error { return nil } - -// Healthy is a no-op. -func (NilHealthcheck) Healthy() {} - -// Unhealthy is a no-op. -func (NilHealthcheck) Unhealthy(error) {} - -// StandardHealthcheck is the standard implementation of a Healthcheck and +// Healthcheck is the standard implementation of a Healthcheck and // stores the status and a function to call to update the status. -type StandardHealthcheck struct { +type Healthcheck struct { err error - f func(Healthcheck) + f func(*Healthcheck) } // Check runs the healthcheck function to update the healthcheck's status. -func (h *StandardHealthcheck) Check() { +func (h *Healthcheck) Check() { h.f(h) } // Error returns the healthcheck's status, which will be nil if it is healthy. -func (h *StandardHealthcheck) Error() error { +func (h *Healthcheck) Error() error { return h.err } // Healthy marks the healthcheck as healthy. -func (h *StandardHealthcheck) Healthy() { +func (h *Healthcheck) Healthy() { h.err = nil } // Unhealthy marks the healthcheck as unhealthy. The error is stored and // may be retrieved by the Error method. -func (h *StandardHealthcheck) Unhealthy(err error) { +func (h *Healthcheck) Unhealthy(err error) { h.err = err } diff --git a/metrics/histogram.go b/metrics/histogram.go index 46f3bbd2f138..7c27bcc92880 100644 --- a/metrics/histogram.go +++ b/metrics/histogram.go @@ -1,22 +1,25 @@ package metrics -// Histograms calculate distribution statistics from a series of int64 values. -type Histogram interface { - Clear() +type HistogramSnapshot interface { Count() int64 Max() int64 Mean() float64 Min() int64 Percentile(float64) float64 Percentiles([]float64) []float64 - Sample() Sample - Snapshot() Histogram + Size() int StdDev() float64 Sum() int64 - Update(int64) Variance() float64 } +// Histogram calculates distribution statistics from a series of int64 values. +type Histogram interface { + Clear() + Update(int64) + Snapshot() HistogramSnapshot +} + // GetOrRegisterHistogram returns an existing Histogram or constructs and // registers a new StandardHistogram. func GetOrRegisterHistogram(name string, r Registry, s Sample) Histogram { @@ -26,12 +29,18 @@ func GetOrRegisterHistogram(name string, r Registry, s Sample) Histogram { return r.GetOrRegister(name, func() Histogram { return NewHistogram(s) }).(Histogram) } +// GetOrRegisterHistogramLazy returns an existing Histogram or constructs and +// registers a new StandardHistogram. +func GetOrRegisterHistogramLazy(name string, r Registry, s func() Sample) Histogram { + if nil == r { + r = DefaultRegistry + } + return r.GetOrRegister(name, func() Histogram { return NewHistogram(s()) }).(Histogram) +} + // NewHistogram constructs a new StandardHistogram from a Sample. func NewHistogram(s Sample) Histogram { - if !Enabled { - return NilHistogram{} - } - return &StandardHistogram{sample: s} + return &StandardHistogram{s} } // NewRegisteredHistogram constructs and registers a new StandardHistogram from @@ -45,109 +54,6 @@ func NewRegisteredHistogram(name string, r Registry, s Sample) Histogram { return c } -// HistogramSnapshot is a read-only copy of another Histogram. -type HistogramSnapshot struct { - sample *SampleSnapshot -} - -// Clear panics. -func (*HistogramSnapshot) Clear() { - panic("Clear called on a HistogramSnapshot") -} - -// Count returns the number of samples recorded at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample at the time the snapshot -// was taken. -func (h *HistogramSnapshot) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample at the time the snapshot was -// taken. -func (h *HistogramSnapshot) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the sample -// at the time the snapshot was taken. -func (h *HistogramSnapshot) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *HistogramSnapshot) Sample() Sample { return h.sample } - -// Snapshot returns the snapshot. -func (h *HistogramSnapshot) Snapshot() Histogram { return h } - -// StdDev returns the standard deviation of the values in the sample at the -// time the snapshot was taken. -func (h *HistogramSnapshot) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample at the time the snapshot was taken. -func (h *HistogramSnapshot) Sum() int64 { return h.sample.Sum() } - -// Update panics. -func (*HistogramSnapshot) Update(int64) { - panic("Update called on a HistogramSnapshot") -} - -// Variance returns the variance of inputs at the time the snapshot was taken. -func (h *HistogramSnapshot) Variance() float64 { return h.sample.Variance() } - -// NilHistogram is a no-op Histogram. -type NilHistogram struct{} - -// Clear is a no-op. -func (NilHistogram) Clear() {} - -// Count is a no-op. -func (NilHistogram) Count() int64 { return 0 } - -// Max is a no-op. -func (NilHistogram) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilHistogram) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilHistogram) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilHistogram) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilHistogram) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Sample is a no-op. -func (NilHistogram) Sample() Sample { return NilSample{} } - -// Snapshot is a no-op. -func (NilHistogram) Snapshot() Histogram { return NilHistogram{} } - -// StdDev is a no-op. -func (NilHistogram) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilHistogram) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilHistogram) Update(v int64) {} - -// Variance is a no-op. -func (NilHistogram) Variance() float64 { return 0.0 } - // StandardHistogram is the standard implementation of a Histogram and uses a // Sample to bound its memory use. type StandardHistogram struct { @@ -157,46 +63,10 @@ type StandardHistogram struct { // Clear clears the histogram and its sample. func (h *StandardHistogram) Clear() { h.sample.Clear() } -// Count returns the number of samples recorded since the histogram was last -// cleared. -func (h *StandardHistogram) Count() int64 { return h.sample.Count() } - -// Max returns the maximum value in the sample. -func (h *StandardHistogram) Max() int64 { return h.sample.Max() } - -// Mean returns the mean of the values in the sample. -func (h *StandardHistogram) Mean() float64 { return h.sample.Mean() } - -// Min returns the minimum value in the sample. -func (h *StandardHistogram) Min() int64 { return h.sample.Min() } - -// Percentile returns an arbitrary percentile of the values in the sample. -func (h *StandardHistogram) Percentile(p float64) float64 { - return h.sample.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (h *StandardHistogram) Percentiles(ps []float64) []float64 { - return h.sample.Percentiles(ps) -} - -// Sample returns the Sample underlying the histogram. -func (h *StandardHistogram) Sample() Sample { return h.sample } - // Snapshot returns a read-only copy of the histogram. -func (h *StandardHistogram) Snapshot() Histogram { - return &HistogramSnapshot{sample: h.sample.Snapshot().(*SampleSnapshot)} +func (h *StandardHistogram) Snapshot() HistogramSnapshot { + return h.sample.Snapshot() } -// StdDev returns the standard deviation of the values in the sample. -func (h *StandardHistogram) StdDev() float64 { return h.sample.StdDev() } - -// Sum returns the sum in the sample. -func (h *StandardHistogram) Sum() int64 { return h.sample.Sum() } - // Update samples a new value. func (h *StandardHistogram) Update(v int64) { h.sample.Update(v) } - -// Variance returns the variance of the values in the sample. -func (h *StandardHistogram) Variance() float64 { return h.sample.Variance() } diff --git a/metrics/histogram_test.go b/metrics/histogram_test.go index 7c9f42fcec96..22fc5468b0b5 100644 --- a/metrics/histogram_test.go +++ b/metrics/histogram_test.go @@ -14,7 +14,7 @@ func TestGetOrRegisterHistogram(t *testing.T) { r := NewRegistry() s := NewUniformSample(100) NewRegisteredHistogram("foo", r, s).Update(47) - if h := GetOrRegisterHistogram("foo", r, s); h.Count() != 1 { + if h := GetOrRegisterHistogram("foo", r, s).Snapshot(); h.Count() != 1 { t.Fatal(h) } } @@ -24,11 +24,11 @@ func TestHistogram10000(t *testing.T) { for i := 1; i <= 10000; i++ { h.Update(int64(i)) } - testHistogram10000(t, h) + testHistogram10000(t, h.Snapshot()) } func TestHistogramEmpty(t *testing.T) { - h := NewHistogram(NewUniformSample(100)) + h := NewHistogram(NewUniformSample(100)).Snapshot() if count := h.Count(); count != 0 { t.Errorf("h.Count(): 0 != %v\n", count) } @@ -66,7 +66,7 @@ func TestHistogramSnapshot(t *testing.T) { testHistogram10000(t, snapshot) } -func testHistogram10000(t *testing.T, h Histogram) { +func testHistogram10000(t *testing.T, h HistogramSnapshot) { if count := h.Count(); count != 10000 { t.Errorf("h.Count(): 10000 != %v\n", count) } diff --git a/metrics/influxdb/influxdb.go b/metrics/influxdb/influxdb.go index 5c00c1a4c320..37a6a3808e3b 100644 --- a/metrics/influxdb/influxdb.go +++ b/metrics/influxdb/influxdb.go @@ -2,229 +2,121 @@ package influxdb import ( "fmt" - "log" - uurl "net/url" - "time" "github.com/XinFinOrg/XDPoSChain/metrics" - "github.com/influxdata/influxdb/client" ) -type reporter struct { - reg metrics.Registry - interval time.Duration - - url uurl.URL - database string - username string - password string - namespace string - tags map[string]string - - client *client.Client - - cache map[string]int64 -} - -// InfluxDB starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval. -func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, password, namespace string) { - InfluxDBWithTags(r, d, url, database, username, password, namespace, nil) -} - -// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags -func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { - u, err := uurl.Parse(url) - if err != nil { - log.Printf("unable to parse InfluxDB url %s. err=%v", url, err) - return - } - - rep := &reporter{ - reg: r, - interval: d, - url: *u, - database: database, - username: username, - password: password, - namespace: namespace, - tags: tags, - cache: make(map[string]int64), - } - if err := rep.makeClient(); err != nil { - log.Printf("unable to make InfluxDB client. err=%v", err) - return - } - - rep.run() -} - -func (r *reporter) makeClient() (err error) { - r.client, err = client.NewClient(client.Config{ - URL: r.url, - Username: r.username, - Password: r.password, - }) - - return -} - -func (r *reporter) run() { - intervalTicker := time.NewTicker(r.interval) - pingTicker := time.NewTicker(time.Second * 5) - - defer intervalTicker.Stop() - defer pingTicker.Stop() - - for { - select { - case <-intervalTicker.C: - if err := r.send(); err != nil { - log.Printf("unable to send to InfluxDB. err=%v", err) - } - case <-pingTicker.C: - _, _, err := r.client.Ping() - if err != nil { - log.Printf("got error while sending a ping to InfluxDB, trying to recreate client. err=%v", err) - - if err = r.makeClient(); err != nil { - log.Printf("unable to make InfluxDB client. err=%v", err) - } - } +func readMeter(namespace, name string, i interface{}) (string, map[string]interface{}) { + switch metric := i.(type) { + case *metrics.Counter: + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Count(), } - } -} - -func (r *reporter) send() error { - var pts []client.Point - - r.reg.Each(func(name string, i interface{}) { - now := time.Now() - namespace := r.namespace - - switch metric := i.(type) { - case metrics.Counter: - v := metric.Count() - l := r.cache[name] - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.count", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": v - l, - }, - Time: now, - }) - r.cache[name] = v - case metrics.Gauge: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": ms.Value(), - }, - Time: now, - }) - case metrics.GaugeFloat64: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.gauge", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "value": ms.Value(), - }, - Time: now, - }) - case metrics.Histogram: - ms := metric.Snapshot() - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.histogram", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - }, - Time: now, - }) - case metrics.Meter: - ms := metric.Snapshot() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.meter", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "mean": ms.RateMean(), - }, - Time: now, - }) - case metrics.Timer: - ms := metric.Snapshot() - ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.timer", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": ms.Count(), - "max": ms.Max(), - "mean": ms.Mean(), - "min": ms.Min(), - "stddev": ms.StdDev(), - "variance": ms.Variance(), - "p50": ps[0], - "p75": ps[1], - "p95": ps[2], - "p99": ps[3], - "p999": ps[4], - "p9999": ps[5], - "m1": ms.Rate1(), - "m5": ms.Rate5(), - "m15": ms.Rate15(), - "meanrate": ms.RateMean(), - }, - Time: now, - }) - case metrics.ResettingTimer: - t := metric.Snapshot() - - if len(t.Values()) > 0 { - ps := t.Percentiles([]float64{50, 95, 99}) - val := t.Values() - pts = append(pts, client.Point{ - Measurement: fmt.Sprintf("%s%s.span", namespace, name), - Tags: r.tags, - Fields: map[string]interface{}{ - "count": len(val), - "max": val[len(val)-1], - "mean": t.Mean(), - "min": val[0], - "p50": ps[0], - "p95": ps[1], - "p99": ps[2], - }, - Time: now, - }) - } + return measurement, fields + case *metrics.CounterFloat64: + measurement := fmt.Sprintf("%s%s.count", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Count(), } - }) - - bps := client.BatchPoints{ - Points: pts, - Database: r.database, + return measurement, fields + case *metrics.Gauge: + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Value(), + } + return measurement, fields + case *metrics.GaugeFloat64: + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": metric.Snapshot().Value(), + } + return measurement, fields + case *metrics.GaugeInfo: + ms := metric.Snapshot() + measurement := fmt.Sprintf("%s%s.gauge", namespace, name) + fields := map[string]interface{}{ + "value": ms.Value().String(), + } + return measurement, fields + case metrics.Histogram: + ms := metric.Snapshot() + if ms.Count() <= 0 { + break + } + ps := ms.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + measurement := fmt.Sprintf("%s%s.histogram", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p25": ps[0], + "p50": ps[1], + "p75": ps[2], + "p95": ps[3], + "p99": ps[4], + "p999": ps[5], + "p9999": ps[6], + } + return measurement, fields + case *metrics.Meter: + ms := metric.Snapshot() + measurement := fmt.Sprintf("%s%s.meter", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "mean": ms.RateMean(), + } + return measurement, fields + case *metrics.Timer: + ms := metric.Snapshot() + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + + measurement := fmt.Sprintf("%s%s.timer", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "stddev": ms.StdDev(), + "variance": ms.Variance(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + "m1": ms.Rate1(), + "m5": ms.Rate5(), + "m15": ms.Rate15(), + "meanrate": ms.RateMean(), + } + return measurement, fields + case *metrics.ResettingTimer: + ms := metric.Snapshot() + if ms.Count() == 0 { + break + } + ps := ms.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + measurement := fmt.Sprintf("%s%s.timer", namespace, name) + fields := map[string]interface{}{ + "count": ms.Count(), + "max": ms.Max(), + "mean": ms.Mean(), + "min": ms.Min(), + "p50": ps[0], + "p75": ps[1], + "p95": ps[2], + "p99": ps[3], + "p999": ps[4], + "p9999": ps[5], + } + return measurement, fields } - - _, err := r.client.Write(bps) - return err + return "", nil } diff --git a/metrics/influxdb/influxdb_test.go b/metrics/influxdb/influxdb_test.go new file mode 100644 index 000000000000..d7e47fda0e08 --- /dev/null +++ b/metrics/influxdb/influxdb_test.go @@ -0,0 +1,123 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package influxdb + +import ( + "fmt" + "io" + "net/http" + "net/http/httptest" + "net/url" + "os" + "runtime" + "strings" + "testing" + + "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/internal" + influxdb2 "github.com/influxdata/influxdb-client-go/v2" +) + +func TestMain(m *testing.M) { + metrics.Enable() + os.Exit(m.Run()) +} + +func TestExampleV1(t *testing.T) { + if runtime.GOARCH == "arm64" { + t.Skip("test skipped on ARM64 due to floating point precision differences") + } + + r := internal.ExampleMetrics() + var have, want string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + haveB, _ := io.ReadAll(r.Body) + have = string(haveB) + r.Body.Close() + })) + defer ts.Close() + u, _ := url.Parse(ts.URL) + rep := &reporter{ + reg: r, + url: *u, + namespace: "goth.", + } + if err := rep.makeClient(); err != nil { + t.Fatal(err) + } + if err := rep.send(978307200); err != nil { + t.Fatal(err) + } + if wantB, err := os.ReadFile("./testdata/influxdbv1.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + } +} + +func TestExampleV2(t *testing.T) { + if runtime.GOARCH == "arm64" { + t.Skip("test skipped on ARM64 due to floating point precision differences") + } + + r := internal.ExampleMetrics() + var have, want string + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + haveB, _ := io.ReadAll(r.Body) + have = string(haveB) + r.Body.Close() + })) + defer ts.Close() + + rep := &v2Reporter{ + reg: r, + endpoint: ts.URL, + namespace: "goth.", + } + rep.client = influxdb2.NewClient(rep.endpoint, rep.token) + defer rep.client.Close() + rep.write = rep.client.WriteAPI(rep.organization, rep.bucket) + + rep.send(978307200) + + if wantB, err := os.ReadFile("./testdata/influxdbv2.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + } +} + +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) + } + } + return "" +} diff --git a/metrics/influxdb/influxdbv1.go b/metrics/influxdb/influxdbv1.go new file mode 100644 index 000000000000..7b55872fbad3 --- /dev/null +++ b/metrics/influxdb/influxdbv1.go @@ -0,0 +1,152 @@ +package influxdb + +import ( + "fmt" + uurl "net/url" + "time" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" + client "github.com/influxdata/influxdb1-client/v2" +) + +type reporter struct { + reg metrics.Registry + interval time.Duration + + url uurl.URL + database string + username string + password string + namespace string + tags map[string]string + + client client.Client + + cache map[string]int64 +} + +// InfluxDB starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval. +func InfluxDB(r metrics.Registry, d time.Duration, url, database, username, password, namespace string) { + InfluxDBWithTags(r, d, url, database, username, password, namespace, nil) +} + +// InfluxDBWithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +func InfluxDBWithTags(r metrics.Registry, d time.Duration, url, database, username, password, namespace string, tags map[string]string) { + u, err := uurl.Parse(url) + if err != nil { + log.Warn("Unable to parse InfluxDB", "url", url, "err", err) + return + } + + rep := &reporter{ + reg: r, + interval: d, + url: *u, + database: database, + username: username, + password: password, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + if err := rep.makeClient(); err != nil { + log.Warn("Unable to make InfluxDB client", "err", err) + return + } + + rep.run() +} + +// InfluxDBWithTagsOnce runs once an InfluxDB reporter and post the given metrics.Registry with the specified tags +func InfluxDBWithTagsOnce(r metrics.Registry, url, database, username, password, namespace string, tags map[string]string) error { + u, err := uurl.Parse(url) + if err != nil { + return fmt.Errorf("unable to parse InfluxDB. url: %s, err: %v", url, err) + } + + rep := &reporter{ + reg: r, + url: *u, + database: database, + username: username, + password: password, + namespace: namespace, + tags: tags, + cache: make(map[string]int64), + } + if err := rep.makeClient(); err != nil { + return fmt.Errorf("unable to make InfluxDB client. err: %v", err) + } + + if err := rep.send(0); err != nil { + return fmt.Errorf("unable to send to InfluxDB. err: %v", err) + } + + return nil +} + +func (r *reporter) makeClient() (err error) { + r.client, err = client.NewHTTPClient(client.HTTPConfig{ + Addr: r.url.String(), + Username: r.username, + Password: r.password, + Timeout: 10 * time.Second, + }) + + return +} + +func (r *reporter) run() { + intervalTicker := time.NewTicker(r.interval) + pingTicker := time.NewTicker(time.Second * 5) + + defer intervalTicker.Stop() + defer pingTicker.Stop() + + for { + select { + case <-intervalTicker.C: + if err := r.send(0); err != nil { + log.Warn("Unable to send to InfluxDB", "err", err) + } + case <-pingTicker.C: + _, _, err := r.client.Ping(0) + if err != nil { + log.Warn("Got error while sending a ping to InfluxDB, trying to recreate client", "err", err) + + if err = r.makeClient(); err != nil { + log.Warn("Unable to make InfluxDB client", "err", err) + } + } + } + } +} + +// send sends the measurements. If provided tstamp is >0, it is used. Otherwise, +// a 'fresh' timestamp is used. +func (r *reporter) send(tstamp int64) error { + bps, err := client.NewBatchPoints( + client.BatchPointsConfig{ + Database: r.database, + }) + if err != nil { + return err + } + r.reg.Each(func(name string, i interface{}) { + var now time.Time + if tstamp <= 0 { + now = time.Now() + } else { + now = time.Unix(tstamp, 0) + } + measurement, fields := readMeter(r.namespace, name, i) + if fields == nil { + return + } + if p, err := client.NewPoint(measurement, r.tags, fields, now); err == nil { + bps.AddPoint(p) + } + }) + return r.client.Write(bps) +} diff --git a/metrics/influxdb/influxdbv2.go b/metrics/influxdb/influxdbv2.go new file mode 100644 index 000000000000..5d8ba1b012ac --- /dev/null +++ b/metrics/influxdb/influxdbv2.go @@ -0,0 +1,96 @@ +package influxdb + +import ( + "context" + "time" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" + influxdb2 "github.com/influxdata/influxdb-client-go/v2" + "github.com/influxdata/influxdb-client-go/v2/api" +) + +type v2Reporter struct { + reg metrics.Registry + interval time.Duration + + endpoint string + token string + bucket string + organization string + namespace string + tags map[string]string + + client influxdb2.Client + write api.WriteAPI +} + +// InfluxDBV2WithTags starts a InfluxDB reporter which will post the from the given metrics.Registry at each d interval with the specified tags +func InfluxDBV2WithTags(r metrics.Registry, d time.Duration, endpoint string, token string, bucket string, organization string, namespace string, tags map[string]string) { + rep := &v2Reporter{ + reg: r, + interval: d, + endpoint: endpoint, + token: token, + bucket: bucket, + organization: organization, + namespace: namespace, + tags: tags, + } + + rep.client = influxdb2.NewClient(rep.endpoint, rep.token) + defer rep.client.Close() + + // async write client + rep.write = rep.client.WriteAPI(rep.organization, rep.bucket) + errorsCh := rep.write.Errors() + + // have to handle write errors in a separate goroutine like this b/c the channel is unbuffered and will block writes if not read + go func() { + for err := range errorsCh { + log.Warn("write error", "err", err.Error()) + } + }() + rep.run() +} + +func (r *v2Reporter) run() { + intervalTicker := time.NewTicker(r.interval) + pingTicker := time.NewTicker(time.Second * 5) + + defer intervalTicker.Stop() + defer pingTicker.Stop() + + for { + select { + case <-intervalTicker.C: + r.send(0) + case <-pingTicker.C: + _, err := r.client.Health(context.Background()) + if err != nil { + log.Warn("Got error from influxdb client health check", "err", err.Error()) + } + } + } +} + +// send sends the measurements. If provided tstamp is >0, it is used. Otherwise, +// a 'fresh' timestamp is used. +func (r *v2Reporter) send(tstamp int64) { + r.reg.Each(func(name string, i interface{}) { + var now time.Time + if tstamp <= 0 { + now = time.Now() + } else { + now = time.Unix(tstamp, 0) + } + measurement, fields := readMeter(r.namespace, name, i) + if fields == nil { + return + } + pt := influxdb2.NewPoint(measurement, r.tags, fields, now) + r.write.WritePoint(pt) + }) + // Force all unwritten data to be sent + r.write.Flush() +} diff --git a/metrics/influxdb/testdata/influxdbv1.want b/metrics/influxdb/testdata/influxdbv1.want new file mode 100644 index 000000000000..ded9434c7314 --- /dev/null +++ b/metrics/influxdb/testdata/influxdbv1.want @@ -0,0 +1,11 @@ +goth.system/cpu/schedlatency.histogram count=5645i,max=41943040i,mean=1819544.0410983171,min=0i,p25=0,p50=0,p75=7168,p95=16777216,p99=29360128,p999=33554432,p9999=33554432,stddev=6393570.217198883,variance=40877740122252.57 978307200000000000 +goth.system/memory/pauses.histogram count=14i,max=229376i,mean=50066.28571428572,min=5120i,p25=10240,p50=32768,p75=57344,p95=196608,p99=196608,p999=196608,p9999=196608,stddev=54726.062410783874,variance=2994941906.9890113 978307200000000000 +goth.test/counter.count value=12345 978307200000000000 +goth.test/counter_float64.count value=54321.98 978307200000000000 +goth.test/gauge.gauge value=23456i 978307200000000000 +goth.test/gauge_float64.gauge value=34567.89 978307200000000000 +goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 +goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 +goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 +goth.test/resetting_timer.timer count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000,p75=40500000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000 978307200000000000 +goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/influxdb/testdata/influxdbv2.want b/metrics/influxdb/testdata/influxdbv2.want new file mode 100644 index 000000000000..ded9434c7314 --- /dev/null +++ b/metrics/influxdb/testdata/influxdbv2.want @@ -0,0 +1,11 @@ +goth.system/cpu/schedlatency.histogram count=5645i,max=41943040i,mean=1819544.0410983171,min=0i,p25=0,p50=0,p75=7168,p95=16777216,p99=29360128,p999=33554432,p9999=33554432,stddev=6393570.217198883,variance=40877740122252.57 978307200000000000 +goth.system/memory/pauses.histogram count=14i,max=229376i,mean=50066.28571428572,min=5120i,p25=10240,p50=32768,p75=57344,p95=196608,p99=196608,p999=196608,p9999=196608,stddev=54726.062410783874,variance=2994941906.9890113 978307200000000000 +goth.test/counter.count value=12345 978307200000000000 +goth.test/counter_float64.count value=54321.98 978307200000000000 +goth.test/gauge.gauge value=23456i 978307200000000000 +goth.test/gauge_float64.gauge value=34567.89 978307200000000000 +goth.test/gauge_info.gauge value="{\"arch\":\"amd64\",\"commit\":\"7caa2d8163ae3132c1c2d6978c76610caee2d949\",\"os\":\"linux\",\"protocol_versions\":\"64 65 66\",\"version\":\"1.10.18-unstable\"}" 978307200000000000 +goth.test/histogram.histogram count=3i,max=3i,mean=2,min=1i,p25=1,p50=2,p75=3,p95=3,p99=3,p999=3,p9999=3,stddev=0.816496580927726,variance=0.6666666666666666 978307200000000000 +goth.test/meter.meter count=0i,m1=0,m15=0,m5=0,mean=0 978307200000000000 +goth.test/resetting_timer.timer count=6i,max=120000000i,mean=30000000,min=10000000i,p50=12500000,p75=40500000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000 978307200000000000 +goth.test/timer.timer count=6i,m1=0,m15=0,m5=0,max=120000000i,mean=38333333.333333336,meanrate=0,min=20000000i,p50=22500000,p75=48000000,p95=120000000,p99=120000000,p999=120000000,p9999=120000000,stddev=36545253.529775314,variance=1335555555555555.2 978307200000000000 diff --git a/metrics/init_test.go b/metrics/init_test.go index 43401e833c34..af75bee425b1 100644 --- a/metrics/init_test.go +++ b/metrics/init_test.go @@ -1,5 +1,5 @@ package metrics func init() { - Enabled = true + metricsEnabled = true } diff --git a/metrics/internal/sampledata.go b/metrics/internal/sampledata.go new file mode 100644 index 000000000000..15b52066f16a --- /dev/null +++ b/metrics/internal/sampledata.go @@ -0,0 +1,95 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package internal + +import ( + "bytes" + "encoding/gob" + metrics2 "runtime/metrics" + "time" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +// ExampleMetrics returns an ordered registry populated with a sample of metrics. +func ExampleMetrics() metrics.Registry { + var registry = metrics.NewOrderedRegistry() + + metrics.NewRegisteredCounterFloat64("test/counter", registry).Inc(12345) + metrics.NewRegisteredCounterFloat64("test/counter_float64", registry).Inc(54321.98) + metrics.NewRegisteredGauge("test/gauge", registry).Update(23456) + metrics.NewRegisteredGaugeFloat64("test/gauge_float64", registry).Update(34567.89) + metrics.NewRegisteredGaugeInfo("test/gauge_info", registry).Update( + metrics.GaugeInfoValue{ + "version": "1.10.18-unstable", + "arch": "amd64", + "os": "linux", + "commit": "7caa2d8163ae3132c1c2d6978c76610caee2d949", + "protocol_versions": "64 65 66", + }) + + { + s := metrics.NewUniformSample(3) + s.Update(1) + s.Update(2) + s.Update(3) + //metrics.NewRegisteredHistogram("test/histogram", registry, metrics.NewSampleSnapshot(3, []int64{1, 2, 3})) + metrics.NewRegisteredHistogram("test/histogram", registry, s) + } + registry.Register("test/meter", metrics.NewInactiveMeter()) + { + timer := metrics.NewRegisteredResettingTimer("test/resetting_timer", registry) + timer.Update(10 * time.Millisecond) + timer.Update(11 * time.Millisecond) + timer.Update(12 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(13 * time.Millisecond) + timer.Update(14 * time.Millisecond) + } + { + timer := metrics.NewRegisteredTimer("test/timer", registry) + timer.Update(20 * time.Millisecond) + timer.Update(21 * time.Millisecond) + timer.Update(22 * time.Millisecond) + timer.Update(120 * time.Millisecond) + timer.Update(23 * time.Millisecond) + timer.Update(24 * time.Millisecond) + timer.Stop() + } + registry.Register("test/empty_resetting_timer", metrics.NewResettingTimer().Snapshot()) + + { // go runtime metrics + var sLatency = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06T\xff\x82\x01\xff\xa2\x00\xfe\r\xef\x00\x01\x02\x02\x04\x05\x04\b\x15\x17 B?6.L;$!2) \x1a? \x190aH7FY6#\x190\x1d\x14\x10\x1b\r\t\x04\x03\x01\x01\x00\x03\x02\x00\x03\x05\x05\x02\x02\x06\x04\v\x06\n\x15\x18\x13'&.\x12=H/L&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11ř>\xf8\x84\xf7C֔\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11ũ>\xf8\x84\xf7C֔\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7C֔\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7C֔\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7C֔\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7C֔\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7C֔\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7C֔\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7C֔\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7C֔\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7C֔\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7C֔\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7C֔\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7C֔\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7C֔\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7C֔\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11ř?\xf8\x84\xf7C֔\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11ũ?\xf8\x84\xf7C֔\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7C֔\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7C֔\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7C֔\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7C֔\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7C֔\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7C֔\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7C֔\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7C֔\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7C֔\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7C֔\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7C֔\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7C֔\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7C֔\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7C֔\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11ř@\xf8\x84\xf7C֔\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11ũ@\xf8\x84\xf7C֔\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7C֔\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7C֔\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7C֔\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7C֔\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7C֔\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + var gcPauses = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06R\xff\x82\x01\xff\xa2\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x01\x01\x00\x01\x00\x00\x00\x01\x01\x01\x01\x01\x01\x01\x00\x02\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11ř>\xf8\x84\xf7C֔\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11ũ>\xf8\x84\xf7C֔\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7C֔\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7C֔\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7C֔\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7C֔\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7C֔\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7C֔\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7C֔\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7C֔\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7C֔\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7C֔\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7C֔\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7C֔\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7C֔\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7C֔\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11ř?\xf8\x84\xf7C֔\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11ũ?\xf8\x84\xf7C֔\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7C֔\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7C֔\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7C֔\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7C֔\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7C֔\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7C֔\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7C֔\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7C֔\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7C֔\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7C֔\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7C֔\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7C֔\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7C֔\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7C֔\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11ř@\xf8\x84\xf7C֔\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11ũ@\xf8\x84\xf7C֔\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7C֔\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7C֔\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7C֔\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7C֔\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7C֔\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + + var secondsToNs = float64(time.Second) + + dserialize := func(data string) *metrics2.Float64Histogram { + var res metrics2.Float64Histogram + if err := gob.NewDecoder(bytes.NewReader([]byte(data))).Decode(&res); err != nil { + panic(err) + } + return &res + } + cpuSchedLatency := metrics.RuntimeHistogramFromData(secondsToNs, dserialize(sLatency)) + registry.Register("system/cpu/schedlatency", cpuSchedLatency) + + memPauses := metrics.RuntimeHistogramFromData(secondsToNs, dserialize(gcPauses)) + registry.Register("system/memory/pauses", memPauses) + } + return registry +} diff --git a/metrics/internal/sampledata_test.go b/metrics/internal/sampledata_test.go new file mode 100644 index 000000000000..f0c0ce39b286 --- /dev/null +++ b/metrics/internal/sampledata_test.go @@ -0,0 +1,27 @@ +package internal + +import ( + "bytes" + "encoding/gob" + "fmt" + metrics2 "runtime/metrics" + "testing" + "time" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +func TestCollectRuntimeMetrics(t *testing.T) { + t.Skip("Only used for generating testdata") + serialize := func(path string, histogram *metrics2.Float64Histogram) { + var f = new(bytes.Buffer) + if err := gob.NewEncoder(f).Encode(histogram); err != nil { + panic(err) + } + fmt.Printf("var %v = %q\n", path, f.Bytes()) + } + time.Sleep(2 * time.Second) + stats := metrics.ReadRuntimeStats() + serialize("schedlatency", stats.SchedLatency) + serialize("gcpauses", stats.GCPauses) +} diff --git a/metrics/json.go b/metrics/json.go index 2087d8211eb1..6b134d477b60 100644 --- a/metrics/json.go +++ b/metrics/json.go @@ -26,6 +26,6 @@ func WriteJSONOnce(r Registry, w io.Writer) { json.NewEncoder(w).Encode(r) } -func (p *PrefixedRegistry) MarshalJSON() ([]byte, error) { - return json.Marshal(p.GetAll()) +func (r *PrefixedRegistry) MarshalJSON() ([]byte, error) { + return json.Marshal(r.GetAll()) } diff --git a/metrics/librato/client.go b/metrics/librato/client.go deleted file mode 100644 index a807c392af01..000000000000 --- a/metrics/librato/client.go +++ /dev/null @@ -1,102 +0,0 @@ -package librato - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "net/http" -) - -const Operations = "operations" -const OperationsShort = "ops" - -type LibratoClient struct { - Email, Token string -} - -// property strings -const ( - // display attributes - Color = "color" - DisplayMax = "display_max" - DisplayMin = "display_min" - DisplayUnitsLong = "display_units_long" - DisplayUnitsShort = "display_units_short" - DisplayStacked = "display_stacked" - DisplayTransform = "display_transform" - // special gauge display attributes - SummarizeFunction = "summarize_function" - Aggregate = "aggregate" - - // metric keys - Name = "name" - Period = "period" - Description = "description" - DisplayName = "display_name" - Attributes = "attributes" - - // measurement keys - MeasureTime = "measure_time" - Source = "source" - Value = "value" - - // special gauge keys - Count = "count" - Sum = "sum" - Max = "max" - Min = "min" - SumSquares = "sum_squares" - - // batch keys - Counters = "counters" - Gauges = "gauges" - - MetricsPostUrl = "https://metrics-api.librato.com/v1/metrics" -) - -type Measurement map[string]interface{} -type Metric map[string]interface{} - -type Batch struct { - Gauges []Measurement `json:"gauges,omitempty"` - Counters []Measurement `json:"counters,omitempty"` - MeasureTime int64 `json:"measure_time"` - Source string `json:"source"` -} - -func (lc *LibratoClient) PostMetrics(batch Batch) (err error) { - var ( - js []byte - req *http.Request - resp *http.Response - ) - - if len(batch.Counters) == 0 && len(batch.Gauges) == 0 { - return nil - } - - if js, err = json.Marshal(batch); err != nil { - return - } - - if req, err = http.NewRequest("POST", MetricsPostUrl, bytes.NewBuffer(js)); err != nil { - return - } - - req.Header.Set("Content-Type", "application/json") - req.SetBasicAuth(lc.Email, lc.Token) - - if resp, err = http.DefaultClient.Do(req); err != nil { - return - } - - if resp.StatusCode != http.StatusOK { - var body []byte - if body, err = io.ReadAll(resp.Body); err != nil { - body = []byte(fmt.Sprintf("(could not fetch response body for error: %s)", err)) - } - err = fmt.Errorf("unable to post to Librato: %d %s %s", resp.StatusCode, resp.Status, string(body)) - } - return -} diff --git a/metrics/librato/librato.go b/metrics/librato/librato.go deleted file mode 100644 index 45bf6cf297a7..000000000000 --- a/metrics/librato/librato.go +++ /dev/null @@ -1,235 +0,0 @@ -package librato - -import ( - "fmt" - "log" - "math" - "regexp" - "time" - - "github.com/XinFinOrg/XDPoSChain/metrics" -) - -// a regexp for extracting the unit from time.Duration.String -var unitRegexp = regexp.MustCompile(`[^\\d]+$`) - -// a helper that turns a time.Duration into librato display attributes for timer metrics -func translateTimerAttributes(d time.Duration) (attrs map[string]interface{}) { - attrs = make(map[string]interface{}) - attrs[DisplayTransform] = fmt.Sprintf("x/%d", int64(d)) - attrs[DisplayUnitsShort] = string(unitRegexp.Find([]byte(d.String()))) - return -} - -type Reporter struct { - Email, Token string - Namespace string - Source string - Interval time.Duration - Registry metrics.Registry - Percentiles []float64 // percentiles to report on histogram metrics - TimerAttributes map[string]interface{} // units in which timers will be displayed - intervalSec int64 -} - -func NewReporter(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) *Reporter { - return &Reporter{e, t, "", s, d, r, p, translateTimerAttributes(u), int64(d / time.Second)} -} - -func Librato(r metrics.Registry, d time.Duration, e string, t string, s string, p []float64, u time.Duration) { - NewReporter(r, d, e, t, s, p, u).Run() -} - -func (re *Reporter) Run() { - log.Printf("WARNING: This client has been DEPRECATED! It has been moved to https://github.com/mihasya/go-metrics-librato and will be removed from rcrowley/go-metrics on August 5th 2015") - ticker := time.Tick(re.Interval) - metricsApi := &LibratoClient{re.Email, re.Token} - for now := range ticker { - var metrics Batch - var err error - if metrics, err = re.BuildRequest(now, re.Registry); err != nil { - log.Printf("ERROR constructing librato request body %s", err) - continue - } - if err := metricsApi.PostMetrics(metrics); err != nil { - log.Printf("ERROR sending metrics to librato %s", err) - continue - } - } -} - -// calculate sum of squares from data provided by metrics.Histogram -// see http://en.wikipedia.org/wiki/Standard_deviation#Rapid_calculation_methods -func sumSquares(s metrics.Sample) float64 { - count := float64(s.Count()) - sumSquared := math.Pow(count*s.Mean(), 2) - sumSquares := math.Pow(count*s.StdDev(), 2) + sumSquared/count - if math.IsNaN(sumSquares) { - return 0.0 - } - return sumSquares -} -func sumSquaresTimer(t metrics.Timer) float64 { - count := float64(t.Count()) - sumSquared := math.Pow(count*t.Mean(), 2) - sumSquares := math.Pow(count*t.StdDev(), 2) + sumSquared/count - if math.IsNaN(sumSquares) { - return 0.0 - } - return sumSquares -} - -func (re *Reporter) BuildRequest(now time.Time, r metrics.Registry) (snapshot Batch, err error) { - snapshot = Batch{ - // coerce timestamps to a stepping fn so that they line up in Librato graphs - MeasureTime: (now.Unix() / re.intervalSec) * re.intervalSec, - Source: re.Source, - } - snapshot.Gauges = make([]Measurement, 0) - snapshot.Counters = make([]Measurement, 0) - histogramGaugeCount := 1 + len(re.Percentiles) - r.Each(func(name string, metric interface{}) { - if re.Namespace != "" { - name = fmt.Sprintf("%s.%s", re.Namespace, name) - } - measurement := Measurement{} - measurement[Period] = re.Interval.Seconds() - switch m := metric.(type) { - case metrics.Counter: - if m.Count() > 0 { - measurement[Name] = fmt.Sprintf("%s.%s", name, "count") - measurement[Value] = float64(m.Count()) - measurement[Attributes] = map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - } - snapshot.Counters = append(snapshot.Counters, measurement) - } - case metrics.Gauge: - measurement[Name] = name - measurement[Value] = float64(m.Value()) - snapshot.Gauges = append(snapshot.Gauges, measurement) - case metrics.GaugeFloat64: - measurement[Name] = name - measurement[Value] = m.Value() - snapshot.Gauges = append(snapshot.Gauges, measurement) - case metrics.Histogram: - if m.Count() > 0 { - gauges := make([]Measurement, histogramGaugeCount) - s := m.Sample() - measurement[Name] = fmt.Sprintf("%s.%s", name, "hist") - measurement[Count] = uint64(s.Count()) - measurement[Max] = float64(s.Max()) - measurement[Min] = float64(s.Min()) - measurement[Sum] = float64(s.Sum()) - measurement[SumSquares] = sumSquares(s) - gauges[0] = measurement - for i, p := range re.Percentiles { - gauges[i+1] = Measurement{ - Name: fmt.Sprintf("%s.%.2f", measurement[Name], p), - Value: s.Percentile(p), - Period: measurement[Period], - } - } - snapshot.Gauges = append(snapshot.Gauges, gauges...) - } - case metrics.Meter: - measurement[Name] = name - measurement[Value] = float64(m.Count()) - snapshot.Counters = append(snapshot.Counters, measurement) - snapshot.Gauges = append(snapshot.Gauges, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "1min"), - Value: m.Rate1(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "5min"), - Value: m.Rate5(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "15min"), - Value: m.Rate15(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - ) - case metrics.Timer: - measurement[Name] = name - measurement[Value] = float64(m.Count()) - snapshot.Counters = append(snapshot.Counters, measurement) - if m.Count() > 0 { - libratoName := fmt.Sprintf("%s.%s", name, "timer.mean") - gauges := make([]Measurement, histogramGaugeCount) - gauges[0] = Measurement{ - Name: libratoName, - Count: uint64(m.Count()), - Sum: m.Mean() * float64(m.Count()), - Max: float64(m.Max()), - Min: float64(m.Min()), - SumSquares: sumSquaresTimer(m), - Period: int64(re.Interval.Seconds()), - Attributes: re.TimerAttributes, - } - for i, p := range re.Percentiles { - gauges[i+1] = Measurement{ - Name: fmt.Sprintf("%s.timer.%2.0f", name, p*100), - Value: m.Percentile(p), - Period: int64(re.Interval.Seconds()), - Attributes: re.TimerAttributes, - } - } - snapshot.Gauges = append(snapshot.Gauges, gauges...) - snapshot.Gauges = append(snapshot.Gauges, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.1min"), - Value: m.Rate1(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.5min"), - Value: m.Rate5(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - Measurement{ - Name: fmt.Sprintf("%s.%s", name, "rate.15min"), - Value: m.Rate15(), - Period: int64(re.Interval.Seconds()), - Attributes: map[string]interface{}{ - DisplayUnitsLong: Operations, - DisplayUnitsShort: OperationsShort, - DisplayMin: "0", - }, - }, - ) - } - } - }) - return -} diff --git a/metrics/log.go b/metrics/log.go index 0c8ea7c97123..3380bbf9c4d0 100644 --- a/metrics/log.go +++ b/metrics/log.go @@ -21,19 +21,21 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { for range time.Tick(freq) { r.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: + case *Counter: l.Printf("counter %s\n", name) - l.Printf(" count: %9d\n", metric.Count()) - case Gauge: + l.Printf(" count: %9d\n", metric.Snapshot().Count()) + case *CounterFloat64: + l.Printf("counter %s\n", name) + l.Printf(" count: %f\n", metric.Snapshot().Count()) + case *Gauge: + l.Printf("gauge %s\n", name) + l.Printf(" value: %9d\n", metric.Snapshot().Value()) + case *GaugeFloat64: l.Printf("gauge %s\n", name) - l.Printf(" value: %9d\n", metric.Value()) - case GaugeFloat64: + l.Printf(" value: %f\n", metric.Snapshot().Value()) + case *GaugeInfo: l.Printf("gauge %s\n", name) - l.Printf(" value: %f\n", metric.Value()) - case Healthcheck: - metric.Check() - l.Printf("healthcheck %s\n", name) - l.Printf(" error: %v\n", metric.Error()) + l.Printf(" value: %s\n", metric.Snapshot().Value()) case Histogram: h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -48,7 +50,7 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { l.Printf(" 95%%: %12.2f\n", ps[2]) l.Printf(" 99%%: %12.2f\n", ps[3]) l.Printf(" 99.9%%: %12.2f\n", ps[4]) - case Meter: + case *Meter: m := metric.Snapshot() l.Printf("meter %s\n", name) l.Printf(" count: %9d\n", m.Count()) @@ -56,7 +58,7 @@ func LogScaled(r Registry, freq time.Duration, scale time.Duration, l Logger) { l.Printf(" 5-min rate: %12.2f\n", m.Rate5()) l.Printf(" 15-min rate: %12.2f\n", m.Rate15()) l.Printf(" mean rate: %12.2f\n", m.RateMean()) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) l.Printf("timer %s\n", name) diff --git a/metrics/meter.go b/metrics/meter.go index 82b2141a624b..194bd1f30411 100644 --- a/metrics/meter.go +++ b/metrics/meter.go @@ -1,65 +1,46 @@ package metrics import ( + "math" "sync" + "sync/atomic" "time" ) -// Meters count events to produce exponentially-weighted moving average rates -// at one-, five-, and fifteen-minutes and a mean rate. -type Meter interface { - Count() int64 - Mark(int64) - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 - Snapshot() Meter - Stop() -} - // GetOrRegisterMeter returns an existing Meter or constructs and registers a -// new StandardMeter. +// new Meter. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func GetOrRegisterMeter(name string, r Registry) Meter { - if nil == r { +func GetOrRegisterMeter(name string, r Registry) *Meter { + if r == nil { r = DefaultRegistry } - return r.GetOrRegister(name, NewMeter).(Meter) + return r.GetOrRegister(name, NewMeter).(*Meter) } -// NewMeter constructs a new StandardMeter and launches a goroutine. +// NewMeter constructs a new Meter and launches a goroutine. // Be sure to call Stop() once the meter is of no use to allow for garbage collection. -func NewMeter() Meter { - if !Enabled { - return NilMeter{} - } - m := newStandardMeter() - arbiter.Lock() - defer arbiter.Unlock() - arbiter.meters[m] = struct{}{} - if !arbiter.started { - arbiter.started = true - go arbiter.tick() - } +func NewMeter() *Meter { + m := newMeter() + arbiter.add(m) return m } -// NewMeter constructs and registers a new StandardMeter and launches a -// goroutine. +// NewInactiveMeter returns a meter but does not start any goroutines. This +// method is mainly intended for testing. +func NewInactiveMeter() *Meter { + return newMeter() +} + +// NewRegisteredMeter constructs and registers a new Meter +// and launches a goroutine. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func NewRegisteredMeter(name string, r Registry) Meter { - c := NewMeter() - if nil == r { - r = DefaultRegistry - } - r.Register(name, c) - return c +func NewRegisteredMeter(name string, r Registry) *Meter { + return GetOrRegisterMeter(name, r) } -// MeterSnapshot is a read-only copy of another Meter. +// MeterSnapshot is a read-only copy of the meter's internal values. type MeterSnapshot struct { count int64 rate1, rate5, rate15, rateMean float64 @@ -68,11 +49,6 @@ type MeterSnapshot struct { // Count returns the count of events at the time the snapshot was taken. func (m *MeterSnapshot) Count() int64 { return m.count } -// Mark panics. -func (*MeterSnapshot) Mark(n int64) { - panic("Mark called on a MeterSnapshot") -} - // Rate1 returns the one-minute moving average rate of events per second at the // time the snapshot was taken. func (m *MeterSnapshot) Rate1() float64 { return m.rate1 } @@ -89,51 +65,20 @@ func (m *MeterSnapshot) Rate15() float64 { return m.rate15 } // snapshot was taken. func (m *MeterSnapshot) RateMean() float64 { return m.rateMean } -// Snapshot returns the snapshot. -func (m *MeterSnapshot) Snapshot() Meter { return m } - -// Stop is a no-op. -func (m *MeterSnapshot) Stop() {} - -// NilMeter is a no-op Meter. -type NilMeter struct{} - -// Count is a no-op. -func (NilMeter) Count() int64 { return 0 } - -// Mark is a no-op. -func (NilMeter) Mark(n int64) {} - -// Rate1 is a no-op. -func (NilMeter) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilMeter) Rate5() float64 { return 0.0 } - -// Rate15is a no-op. -func (NilMeter) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilMeter) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilMeter) Snapshot() Meter { return NilMeter{} } - -// Stop is a no-op. -func (NilMeter) Stop() {} +// Meter count events to produce exponentially-weighted moving average rates +// at one-, five-, and fifteen-minutes and a mean rate. +type Meter struct { + count atomic.Int64 + uncounted atomic.Int64 // not yet added to the EWMAs + rateMean atomic.Uint64 -// StandardMeter is the standard implementation of a Meter. -type StandardMeter struct { - lock sync.RWMutex - snapshot *MeterSnapshot - a1, a5, a15 EWMA + a1, a5, a15 *EWMA startTime time.Time - stopped bool + stopped atomic.Bool } -func newStandardMeter() *StandardMeter { - return &StandardMeter{ - snapshot: &MeterSnapshot{}, +func newMeter() *Meter { + return &Meter{ a1: NewEWMA1(), a5: NewEWMA5(), a15: NewEWMA15(), @@ -142,120 +87,83 @@ func newStandardMeter() *StandardMeter { } // Stop stops the meter, Mark() will be a no-op if you use it after being stopped. -func (m *StandardMeter) Stop() { - m.lock.Lock() - stopped := m.stopped - m.stopped = true - m.lock.Unlock() - if !stopped { - arbiter.Lock() - delete(arbiter.meters, m) - arbiter.Unlock() +func (m *Meter) Stop() { + if stopped := m.stopped.Swap(true); !stopped { + arbiter.remove(m) } } -// Count returns the number of events recorded. -func (m *StandardMeter) Count() int64 { - m.lock.RLock() - count := m.snapshot.count - m.lock.RUnlock() - return count -} - // Mark records the occurrence of n events. -func (m *StandardMeter) Mark(n int64) { - m.lock.Lock() - defer m.lock.Unlock() - if m.stopped { - return - } - m.snapshot.count += n - m.a1.Update(n) - m.a5.Update(n) - m.a15.Update(n) - m.updateSnapshot() -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (m *StandardMeter) Rate1() float64 { - m.lock.RLock() - rate1 := m.snapshot.rate1 - m.lock.RUnlock() - return rate1 -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (m *StandardMeter) Rate5() float64 { - m.lock.RLock() - rate5 := m.snapshot.rate5 - m.lock.RUnlock() - return rate5 -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (m *StandardMeter) Rate15() float64 { - m.lock.RLock() - rate15 := m.snapshot.rate15 - m.lock.RUnlock() - return rate15 -} - -// RateMean returns the meter's mean rate of events per second. -func (m *StandardMeter) RateMean() float64 { - m.lock.RLock() - rateMean := m.snapshot.rateMean - m.lock.RUnlock() - return rateMean +func (m *Meter) Mark(n int64) { + m.uncounted.Add(n) } // Snapshot returns a read-only copy of the meter. -func (m *StandardMeter) Snapshot() Meter { - m.lock.RLock() - snapshot := *m.snapshot - m.lock.RUnlock() - return &snapshot +func (m *Meter) Snapshot() *MeterSnapshot { + return &MeterSnapshot{ + count: m.count.Load() + m.uncounted.Load(), + rate1: m.a1.Snapshot().Rate(), + rate5: m.a5.Snapshot().Rate(), + rate15: m.a15.Snapshot().Rate(), + rateMean: math.Float64frombits(m.rateMean.Load()), + } } -func (m *StandardMeter) updateSnapshot() { - // should run with write lock held on m.lock - snapshot := m.snapshot - snapshot.rate1 = m.a1.Rate() - snapshot.rate5 = m.a5.Rate() - snapshot.rate15 = m.a15.Rate() - snapshot.rateMean = float64(snapshot.count) / time.Since(m.startTime).Seconds() +func (m *Meter) tick() { + // Take the uncounted values, add to count + n := m.uncounted.Swap(0) + count := m.count.Add(n) + m.rateMean.Store(math.Float64bits(float64(count) / time.Since(m.startTime).Seconds())) + // Update the EWMA's internal state + m.a1.Update(n) + m.a5.Update(n) + m.a15.Update(n) + // And trigger them to calculate the rates + m.a1.tick() + m.a5.tick() + m.a15.tick() } -func (m *StandardMeter) tick() { - m.lock.Lock() - defer m.lock.Unlock() - m.a1.Tick() - m.a5.Tick() - m.a15.Tick() - m.updateSnapshot() -} +var arbiter = meterTicker{meters: make(map[*Meter]struct{})} -// meterArbiter ticks meters every 5s from a single goroutine. +// meterTicker ticks meters every 5s from a single goroutine. // meters are references in a set for future stopping. -type meterArbiter struct { - sync.RWMutex +type meterTicker struct { + mu sync.RWMutex + started bool - meters map[*StandardMeter]struct{} - ticker *time.Ticker + meters map[*Meter]struct{} } -var arbiter = meterArbiter{ticker: time.NewTicker(5e9), meters: make(map[*StandardMeter]struct{})} - -// Ticks meters on the scheduled interval -func (ma *meterArbiter) tick() { - for range ma.ticker.C { - ma.tickMeters() +// add adds another *Meter ot the arbiter, and starts the arbiter ticker. +func (ma *meterTicker) add(m *Meter) { + ma.mu.Lock() + defer ma.mu.Unlock() + ma.meters[m] = struct{}{} + if !ma.started { + ma.started = true + go ma.loop() } } -func (ma *meterArbiter) tickMeters() { - ma.RLock() - defer ma.RUnlock() - for meter := range ma.meters { - meter.tick() +// remove removes a meter from the set of ticked meters. +func (ma *meterTicker) remove(m *Meter) { + ma.mu.Lock() + delete(ma.meters, m) + ma.mu.Unlock() +} + +// loop ticks meters on a 5 second interval. +func (ma *meterTicker) loop() { + ticker := time.NewTicker(5 * time.Second) + for range ticker.C { + if !metricsEnabled { + continue + } + ma.mu.RLock() + for meter := range ma.meters { + meter.tick() + } + ma.mu.RUnlock() } } diff --git a/metrics/meter_test.go b/metrics/meter_test.go index 0dfce76b7af0..d62cf386402c 100644 --- a/metrics/meter_test.go +++ b/metrics/meter_test.go @@ -12,27 +12,29 @@ func BenchmarkMeter(b *testing.B) { m.Mark(1) } } - +func TestMeter(t *testing.T) { + m := NewMeter() + m.Mark(47) + if v := m.Snapshot().Count(); v != 47 { + t.Fatalf("have %d want %d", v, 47) + } +} func TestGetOrRegisterMeter(t *testing.T) { r := NewRegistry() NewRegisteredMeter("foo", r).Mark(47) - if m := GetOrRegisterMeter("foo", r); m.Count() != 47 { - t.Fatal(m) + if m := GetOrRegisterMeter("foo", r).Snapshot(); m.Count() != 47 { + t.Fatal(m.Count()) } } func TestMeterDecay(t *testing.T) { - ma := meterArbiter{ - ticker: time.NewTicker(time.Millisecond), - meters: make(map[*StandardMeter]struct{}), - } - m := newStandardMeter() - ma.meters[m] = struct{}{} - go ma.tick() + m := newMeter() m.Mark(1) - rateMean := m.RateMean() + m.tick() + rateMean := m.Snapshot().RateMean() time.Sleep(100 * time.Millisecond) - if m.RateMean() >= rateMean { + m.tick() + if m.Snapshot().RateMean() >= rateMean { t.Error("m.RateMean() didn't decrease") } } @@ -40,7 +42,7 @@ func TestMeterDecay(t *testing.T) { func TestMeterNonzero(t *testing.T) { m := NewMeter() m.Mark(3) - if count := m.Count(); count != 3 { + if count := m.Snapshot().Count(); count != 3 { t.Errorf("m.Count(): 3 != %v\n", count) } } @@ -57,17 +59,25 @@ func TestMeterStop(t *testing.T) { } } -func TestMeterSnapshot(t *testing.T) { - m := NewMeter() - m.Mark(1) - if snapshot := m.Snapshot(); m.RateMean() != snapshot.RateMean() { - t.Fatal(snapshot) +func TestMeterZero(t *testing.T) { + m := NewMeter().Snapshot() + if count := m.Count(); count != 0 { + t.Errorf("m.Count(): 0 != %v\n", count) } } -func TestMeterZero(t *testing.T) { +func TestMeterRepeat(t *testing.T) { m := NewMeter() - if count := m.Count(); count != 0 { - t.Errorf("m.Count(): 0 != %v\n", count) + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Snapshot().Count(); count != 5050 { + t.Errorf("m.Count(): 5050 != %v\n", count) + } + for i := 0; i < 101; i++ { + m.Mark(int64(i)) + } + if count := m.Snapshot().Count(); count != 10100 { + t.Errorf("m.Count(): 10100 != %v\n", count) } } diff --git a/metrics/metrics.go b/metrics/metrics.go index 2d9d652a0201..a9d6623173ed 100644 --- a/metrics/metrics.go +++ b/metrics/metrics.go @@ -6,79 +6,197 @@ package metrics import ( - "os" - "runtime" - "strings" + "runtime/metrics" + "runtime/pprof" "time" +) - "github.com/XinFinOrg/XDPoSChain/log" +var ( + metricsEnabled = false ) -// Enabled is checked by the constructor functions for all of the -// standard metrics. If it is true, the metric returned is a stub. +// Enabled is checked by functions that are deemed 'expensive', e.g. if a +// meter-type does locking and/or non-trivial math operations during update. +func Enabled() bool { + return metricsEnabled +} + +// Enable enables the metrics system. +// The Enabled-flag is expected to be set, once, during startup, but toggling off and on +// is not supported. // -// This global kill-switch helps quantify the observer effect and makes -// for less cluttered pprof profiles. -var Enabled bool = false - -// MetricsEnabledFlag is the CLI flag name to use to enable metrics collections. -const MetricsEnabledFlag = "metrics" - -// Init enables or disables the metrics system. Since we need this to run before -// any other code gets to create meters and timers, we'll actually do an ugly hack -// and peek into the command line args for the metrics flag. -func init() { - for _, arg := range os.Args { - if flag := strings.TrimLeft(arg, "-"); flag == MetricsEnabledFlag { - log.Info("Enabling metrics collection") - Enabled = true +// Enable is not safe to call concurrently. You need to call this as early as possible in +// the program, before any metrics collection will happen. +func Enable() { + metricsEnabled = true +} + +var threadCreateProfile = pprof.Lookup("threadcreate") + +type runtimeStats struct { + GCPauses *metrics.Float64Histogram + GCAllocBytes uint64 + GCFreedBytes uint64 + + MemTotal uint64 + HeapObjects uint64 + HeapFree uint64 + HeapReleased uint64 + HeapUnused uint64 + + Goroutines uint64 + SchedLatency *metrics.Float64Histogram +} + +var runtimeSamples = []metrics.Sample{ + {Name: "/gc/pauses:seconds"}, // histogram + {Name: "/gc/heap/allocs:bytes"}, + {Name: "/gc/heap/frees:bytes"}, + {Name: "/memory/classes/total:bytes"}, + {Name: "/memory/classes/heap/objects:bytes"}, + {Name: "/memory/classes/heap/free:bytes"}, + {Name: "/memory/classes/heap/released:bytes"}, + {Name: "/memory/classes/heap/unused:bytes"}, + {Name: "/sched/goroutines:goroutines"}, + {Name: "/sched/latencies:seconds"}, // histogram +} + +func ReadRuntimeStats() *runtimeStats { + r := new(runtimeStats) + readRuntimeStats(r) + return r +} + +func readRuntimeStats(v *runtimeStats) { + metrics.Read(runtimeSamples) + for _, s := range runtimeSamples { + // Skip invalid/unknown metrics. This is needed because some metrics + // are unavailable in older Go versions, and attempting to read a 'bad' + // metric panics. + if s.Value.Kind() == metrics.KindBad { + continue + } + + switch s.Name { + case "/gc/pauses:seconds": + v.GCPauses = s.Value.Float64Histogram() + case "/gc/heap/allocs:bytes": + v.GCAllocBytes = s.Value.Uint64() + case "/gc/heap/frees:bytes": + v.GCFreedBytes = s.Value.Uint64() + case "/memory/classes/total:bytes": + v.MemTotal = s.Value.Uint64() + case "/memory/classes/heap/objects:bytes": + v.HeapObjects = s.Value.Uint64() + case "/memory/classes/heap/free:bytes": + v.HeapFree = s.Value.Uint64() + case "/memory/classes/heap/released:bytes": + v.HeapReleased = s.Value.Uint64() + case "/memory/classes/heap/unused:bytes": + v.HeapUnused = s.Value.Uint64() + case "/sched/goroutines:goroutines": + v.Goroutines = s.Value.Uint64() + case "/sched/latencies:seconds": + v.SchedLatency = s.Value.Float64Histogram() } } } -// CollectProcessMetrics periodically collects various metrics about the running -// process. +// CollectProcessMetrics periodically collects various metrics about the running process. func CollectProcessMetrics(refresh time.Duration) { // Short circuit if the metrics system is disabled - if !Enabled { + if !metricsEnabled { return } + // Create the various data collectors - memstats := make([]*runtime.MemStats, 2) - diskstats := make([]*DiskStats, 2) - for i := 0; i < len(memstats); i++ { - memstats[i] = new(runtime.MemStats) - diskstats[i] = new(DiskStats) - } + var ( + cpustats = make([]CPUStats, 2) + diskstats = make([]DiskStats, 2) + rstats = make([]runtimeStats, 2) + ) + + // This scale factor is used for the runtime's time metrics. It's useful to convert to + // ns here because the runtime gives times in float seconds, but runtimeHistogram can + // only provide integers for the minimum and maximum values. + const secondsToNs = float64(time.Second) + // Define the various metrics to collect - memAllocs := GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) - memFrees := GetOrRegisterMeter("system/memory/frees", DefaultRegistry) - memInuse := GetOrRegisterMeter("system/memory/inuse", DefaultRegistry) - memPauses := GetOrRegisterMeter("system/memory/pauses", DefaultRegistry) - - var diskReads, diskReadBytes, diskWrites, diskWriteBytes Meter - if err := ReadDiskStats(diskstats[0]); err == nil { - diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) - diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) - diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) - diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) - } else { - log.Debug("Failed to read disk metrics", "err", err) - } - // Iterate loading the different stats and updating the meters - for i := 1; ; i++ { - runtime.ReadMemStats(memstats[i%2]) - memAllocs.Mark(int64(memstats[i%2].Mallocs - memstats[(i-1)%2].Mallocs)) - memFrees.Mark(int64(memstats[i%2].Frees - memstats[(i-1)%2].Frees)) - memInuse.Mark(int64(memstats[i%2].Alloc - memstats[(i-1)%2].Alloc)) - memPauses.Mark(int64(memstats[i%2].PauseTotalNs - memstats[(i-1)%2].PauseTotalNs)) - - if ReadDiskStats(diskstats[i%2]) == nil { - diskReads.Mark(diskstats[i%2].ReadCount - diskstats[(i-1)%2].ReadCount) - diskReadBytes.Mark(diskstats[i%2].ReadBytes - diskstats[(i-1)%2].ReadBytes) - diskWrites.Mark(diskstats[i%2].WriteCount - diskstats[(i-1)%2].WriteCount) - diskWriteBytes.Mark(diskstats[i%2].WriteBytes - diskstats[(i-1)%2].WriteBytes) + var ( + cpuSysLoad = GetOrRegisterGauge("system/cpu/sysload", DefaultRegistry) + cpuSysWait = GetOrRegisterGauge("system/cpu/syswait", DefaultRegistry) + cpuProcLoad = GetOrRegisterGauge("system/cpu/procload", DefaultRegistry) + cpuSysLoadTotal = GetOrRegisterCounterFloat64("system/cpu/sysload/total", DefaultRegistry) + cpuSysWaitTotal = GetOrRegisterCounterFloat64("system/cpu/syswait/total", DefaultRegistry) + cpuProcLoadTotal = GetOrRegisterCounterFloat64("system/cpu/procload/total", DefaultRegistry) + cpuThreads = GetOrRegisterGauge("system/cpu/threads", DefaultRegistry) + cpuGoroutines = GetOrRegisterGauge("system/cpu/goroutines", DefaultRegistry) + cpuSchedLatency = getOrRegisterRuntimeHistogram("system/cpu/schedlatency", secondsToNs, nil) + memPauses = getOrRegisterRuntimeHistogram("system/memory/pauses", secondsToNs, nil) + memAllocs = GetOrRegisterMeter("system/memory/allocs", DefaultRegistry) + memFrees = GetOrRegisterMeter("system/memory/frees", DefaultRegistry) + memTotal = GetOrRegisterGauge("system/memory/held", DefaultRegistry) + heapUsed = GetOrRegisterGauge("system/memory/used", DefaultRegistry) + heapObjects = GetOrRegisterGauge("system/memory/objects", DefaultRegistry) + diskReads = GetOrRegisterMeter("system/disk/readcount", DefaultRegistry) + diskReadBytes = GetOrRegisterMeter("system/disk/readdata", DefaultRegistry) + diskReadBytesCounter = GetOrRegisterCounter("system/disk/readbytes", DefaultRegistry) + diskWrites = GetOrRegisterMeter("system/disk/writecount", DefaultRegistry) + diskWriteBytes = GetOrRegisterMeter("system/disk/writedata", DefaultRegistry) + diskWriteBytesCounter = GetOrRegisterCounter("system/disk/writebytes", DefaultRegistry) + ) + + var lastCollectTime time.Time + + // Iterate loading the different stats and updating the meters. + now, prev := 0, 1 + for ; ; now, prev = prev, now { + // Gather CPU times. + ReadCPUStats(&cpustats[now]) + collectTime := time.Now() + secondsSinceLastCollect := collectTime.Sub(lastCollectTime).Seconds() + lastCollectTime = collectTime + if secondsSinceLastCollect > 0 { + sysLoad := cpustats[now].GlobalTime - cpustats[prev].GlobalTime + sysWait := cpustats[now].GlobalWait - cpustats[prev].GlobalWait + procLoad := cpustats[now].LocalTime - cpustats[prev].LocalTime + // Convert to integer percentage. + cpuSysLoad.Update(int64(sysLoad / secondsSinceLastCollect * 100)) + cpuSysWait.Update(int64(sysWait / secondsSinceLastCollect * 100)) + cpuProcLoad.Update(int64(procLoad / secondsSinceLastCollect * 100)) + // increment counters (ms) + cpuSysLoadTotal.Inc(sysLoad) + cpuSysWaitTotal.Inc(sysWait) + cpuProcLoadTotal.Inc(procLoad) } + + // Threads + cpuThreads.Update(int64(threadCreateProfile.Count())) + + // Go runtime metrics + readRuntimeStats(&rstats[now]) + + cpuGoroutines.Update(int64(rstats[now].Goroutines)) + cpuSchedLatency.update(rstats[now].SchedLatency) + memPauses.update(rstats[now].GCPauses) + + memAllocs.Mark(int64(rstats[now].GCAllocBytes - rstats[prev].GCAllocBytes)) + memFrees.Mark(int64(rstats[now].GCFreedBytes - rstats[prev].GCFreedBytes)) + + memTotal.Update(int64(rstats[now].MemTotal)) + heapUsed.Update(int64(rstats[now].MemTotal - rstats[now].HeapUnused - rstats[now].HeapFree - rstats[now].HeapReleased)) + heapObjects.Update(int64(rstats[now].HeapObjects)) + + // Disk + if ReadDiskStats(&diskstats[now]) == nil { + diskReads.Mark(diskstats[now].ReadCount - diskstats[prev].ReadCount) + diskReadBytes.Mark(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWrites.Mark(diskstats[now].WriteCount - diskstats[prev].WriteCount) + diskWriteBytes.Mark(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) + diskReadBytesCounter.Inc(diskstats[now].ReadBytes - diskstats[prev].ReadBytes) + diskWriteBytesCounter.Inc(diskstats[now].WriteBytes - diskstats[prev].WriteBytes) + } + time.Sleep(refresh) } } diff --git a/metrics/metrics_test.go b/metrics/metrics_test.go index 029c99870eba..dc144f2425a6 100644 --- a/metrics/metrics_test.go +++ b/metrics/metrics_test.go @@ -2,110 +2,47 @@ package metrics import ( "fmt" - "io" - "log" "sync" "testing" "time" ) -const FANOUT = 128 - -// Stop the compiler from complaining during debugging. -var ( - _ = io.Discard - _ = log.LstdFlags -) +func TestReadRuntimeValues(t *testing.T) { + var v runtimeStats + readRuntimeStats(&v) + t.Logf("%+v", v) +} func BenchmarkMetrics(b *testing.B) { - r := NewRegistry() - c := NewRegisteredCounter("counter", r) - g := NewRegisteredGauge("gauge", r) - gf := NewRegisteredGaugeFloat64("gaugefloat64", r) - h := NewRegisteredHistogram("histogram", r, NewUniformSample(100)) - m := NewRegisteredMeter("meter", r) - t := NewRegisteredTimer("timer", r) + var ( + r = NewRegistry() + c = NewRegisteredCounter("counter", r) + cf = NewRegisteredCounterFloat64("counterfloat64", r) + g = NewRegisteredGauge("gauge", r) + gf = NewRegisteredGaugeFloat64("gaugefloat64", r) + h = NewRegisteredHistogram("histogram", r, NewUniformSample(100)) + m = NewRegisteredMeter("meter", r) + t = NewRegisteredTimer("timer", r) + ) RegisterDebugGCStats(r) - RegisterRuntimeMemStats(r) b.ResetTimer() - ch := make(chan bool) - - wgD := &sync.WaitGroup{} - /* - wgD.Add(1) - go func() { - defer wgD.Done() - //log.Println("go CaptureDebugGCStats") - for { - select { - case <-ch: - //log.Println("done CaptureDebugGCStats") - return - default: - CaptureDebugGCStatsOnce(r) - } - } - }() - //*/ - - wgR := &sync.WaitGroup{} - //* - wgR.Add(1) - go func() { - defer wgR.Done() - //log.Println("go CaptureRuntimeMemStats") - for { - select { - case <-ch: - //log.Println("done CaptureRuntimeMemStats") - return - default: - CaptureRuntimeMemStatsOnce(r) - } - } - }() - //*/ - - wgW := &sync.WaitGroup{} - /* - wgW.Add(1) + var wg sync.WaitGroup + wg.Add(128) + for i := 0; i < 128; i++ { go func() { - defer wgW.Done() - //log.Println("go Write") - for { - select { - case <-ch: - //log.Println("done Write") - return - default: - WriteOnce(r, io.Discard) - } - } - }() - //*/ - - wg := &sync.WaitGroup{} - wg.Add(FANOUT) - for i := 0; i < FANOUT; i++ { - go func(i int) { defer wg.Done() - //log.Println("go", i) for i := 0; i < b.N; i++ { c.Inc(1) + cf.Inc(1.0) g.Update(int64(i)) gf.Update(float64(i)) h.Update(int64(i)) m.Mark(1) t.Update(1) } - //log.Println("done", i) - }(i) + }() } wg.Wait() - close(ch) - wgD.Wait() - wgR.Wait() - wgW.Wait() } func Example() { @@ -118,8 +55,8 @@ func Example() { t.Time(func() { time.Sleep(10 * time.Millisecond) }) t.Update(1) - fmt.Println(c.Count()) - fmt.Println(t.Min()) + fmt.Println(c.Snapshot().Count()) + fmt.Println(t.Snapshot().Min()) // Output: 17 // 1 } diff --git a/metrics/opentsdb.go b/metrics/opentsdb.go index df7f152ed2eb..57af3d025e67 100644 --- a/metrics/opentsdb.go +++ b/metrics/opentsdb.go @@ -3,6 +3,7 @@ package metrics import ( "bufio" "fmt" + "io" "log" "net" "os" @@ -10,7 +11,7 @@ import ( "time" ) -var shortHostName string = "" +var shortHostName = "" // OpenTSDBConfig provides a container with configuration parameters for // the OpenTSDB exporter @@ -57,24 +58,22 @@ func getShortHostname() string { return shortHostName } -func openTSDB(c *OpenTSDBConfig) error { - shortHostname := getShortHostname() - now := time.Now().Unix() +// writeRegistry writes the registry-metrics on the opentsb format. +func (c *OpenTSDBConfig) writeRegistry(w io.Writer, now int64, shortHostname string) { du := float64(c.DurationUnit) - conn, err := net.DialTCP("tcp", nil, c.Addr) - if nil != err { - return err - } - defer conn.Close() - w := bufio.NewWriter(conn) + c.Registry.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: - fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Count(), shortHostname) - case Gauge: - fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) - case GaugeFloat64: - fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Value(), shortHostname) + case *Counter: + fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) + case *CounterFloat64: + fmt.Fprintf(w, "put %s.%s.count %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Count(), shortHostname) + case *Gauge: + fmt.Fprintf(w, "put %s.%s.value %d %d host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) + case *GaugeFloat64: + fmt.Fprintf(w, "put %s.%s.value %d %f host=%s\n", c.Prefix, name, now, metric.Snapshot().Value(), shortHostname) + case *GaugeInfo: + fmt.Fprintf(w, "put %s.%s.value %d %s host=%s\n", c.Prefix, name, now, metric.Snapshot().Value().String(), shortHostname) case Histogram: h := metric.Snapshot() ps := h.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) @@ -88,14 +87,14 @@ func openTSDB(c *OpenTSDBConfig) error { fmt.Fprintf(w, "put %s.%s.95-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[2], shortHostname) fmt.Fprintf(w, "put %s.%s.99-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[3], shortHostname) fmt.Fprintf(w, "put %s.%s.999-percentile %d %.2f host=%s\n", c.Prefix, name, now, ps[4], shortHostname) - case Meter: + case *Meter: m := metric.Snapshot() fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, m.Count(), shortHostname) fmt.Fprintf(w, "put %s.%s.one-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate1(), shortHostname) fmt.Fprintf(w, "put %s.%s.five-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate5(), shortHostname) fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, m.Rate15(), shortHostname) fmt.Fprintf(w, "put %s.%s.mean %d %.2f host=%s\n", c.Prefix, name, now, m.RateMean(), shortHostname) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) fmt.Fprintf(w, "put %s.%s.count %d %d host=%s\n", c.Prefix, name, now, t.Count(), shortHostname) @@ -113,7 +112,17 @@ func openTSDB(c *OpenTSDBConfig) error { fmt.Fprintf(w, "put %s.%s.fifteen-minute %d %.2f host=%s\n", c.Prefix, name, now, t.Rate15(), shortHostname) fmt.Fprintf(w, "put %s.%s.mean-rate %d %.2f host=%s\n", c.Prefix, name, now, t.RateMean(), shortHostname) } - w.Flush() }) +} + +func openTSDB(c *OpenTSDBConfig) error { + conn, err := net.DialTCP("tcp", nil, c.Addr) + if nil != err { + return err + } + defer conn.Close() + w := bufio.NewWriter(conn) + c.writeRegistry(w, time.Now().Unix(), getShortHostname()) + w.Flush() return nil } diff --git a/metrics/opentsdb_test.go b/metrics/opentsdb_test.go index c43728960ed5..4548309f9c23 100644 --- a/metrics/opentsdb_test.go +++ b/metrics/opentsdb_test.go @@ -1,7 +1,11 @@ package metrics import ( + "fmt" "net" + "os" + "strings" + "testing" "time" ) @@ -19,3 +23,44 @@ func ExampleOpenTSDBWithConfig() { DurationUnit: time.Millisecond, }) } + +func TestExampleOpenTSB(t *testing.T) { + r := NewOrderedRegistry() + NewRegisteredGaugeInfo("foo", r).Update(GaugeInfoValue{"chain_id": "5"}) + NewRegisteredGaugeFloat64("pi", r).Update(3.14) + NewRegisteredCounter("months", r).Inc(12) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + NewRegisteredMeter("elite", r).Mark(1337) + NewRegisteredTimer("second", r).Update(time.Second) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + NewRegisteredCounterFloat64("tau", r).Inc(1.57) + + w := new(strings.Builder) + (&OpenTSDBConfig{ + Registry: r, + DurationUnit: time.Millisecond, + Prefix: "pre", + }).writeRegistry(w, 978307200, "hal9000") + + wantB, err := os.ReadFile("./testdata/opentsb.want") + if err != nil { + t.Fatal(err) + } + if have, want := w.String(), string(wantB); have != want { + t.Errorf("\nhave:\n%v\nwant:\n%v\n", have, want) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + } +} + +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) + } + } + return "" +} diff --git a/metrics/prometheus/collector.go b/metrics/prometheus/collector.go new file mode 100644 index 000000000000..1d49f51dc295 --- /dev/null +++ b/metrics/prometheus/collector.go @@ -0,0 +1,170 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package prometheus + +import ( + "bytes" + "fmt" + "sort" + "strconv" + "strings" + + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +var ( + typeGaugeTpl = "# TYPE %s gauge\n" + typeCounterTpl = "# TYPE %s counter\n" + typeSummaryTpl = "# TYPE %s summary\n" + keyValueTpl = "%s %v\n\n" + keyQuantileTagValueTpl = "%s {quantile=\"%s\"} %v\n" +) + +// collector is a collection of byte buffers that aggregate Prometheus reports +// for different metric types. +type collector struct { + buff *bytes.Buffer +} + +// newCollector creates a new Prometheus metric aggregator. +func newCollector() *collector { + return &collector{ + buff: &bytes.Buffer{}, + } +} + +// Add adds the metric i to the collector. This method returns an error if the +// metric type is not supported/known. +func (c *collector) Add(name string, i any) error { + switch m := i.(type) { + case *metrics.Counter: + c.addCounter(name, m.Snapshot()) + case *metrics.CounterFloat64: + c.addCounterFloat64(name, m.Snapshot()) + case *metrics.Gauge: + c.addGauge(name, m.Snapshot()) + case *metrics.GaugeFloat64: + c.addGaugeFloat64(name, m.Snapshot()) + case *metrics.GaugeInfo: + c.addGaugeInfo(name, m.Snapshot()) + case metrics.Histogram: + c.addHistogram(name, m.Snapshot()) + case *metrics.Meter: + c.addMeter(name, m.Snapshot()) + case *metrics.Timer: + c.addTimer(name, m.Snapshot()) + case *metrics.ResettingTimer: + c.addResettingTimer(name, m.Snapshot()) + default: + return fmt.Errorf("unknown prometheus metric type %T", i) + } + return nil +} + +func (c *collector) addCounter(name string, m metrics.CounterSnapshot) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addCounterFloat64(name string, m metrics.CounterFloat64Snapshot) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addGauge(name string, m metrics.GaugeSnapshot) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addGaugeFloat64(name string, m metrics.GaugeFloat64Snapshot) { + c.writeGaugeCounter(name, m.Value()) +} + +func (c *collector) addGaugeInfo(name string, m metrics.GaugeInfoSnapshot) { + c.writeGaugeInfo(name, m.Value()) +} + +func (c *collector) addHistogram(name string, m metrics.HistogramSnapshot) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } + c.buff.WriteRune('\n') +} + +func (c *collector) addMeter(name string, m *metrics.MeterSnapshot) { + c.writeGaugeCounter(name, m.Count()) +} + +func (c *collector) addTimer(name string, m *metrics.TimerSnapshot) { + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } + c.buff.WriteRune('\n') +} + +func (c *collector) addResettingTimer(name string, m *metrics.ResettingTimerSnapshot) { + if m.Count() <= 0 { + return + } + pv := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + ps := m.Percentiles(pv) + c.writeSummaryCounter(name, m.Count()) + c.buff.WriteString(fmt.Sprintf(typeSummaryTpl, mutateKey(name))) + for i := range pv { + c.writeSummaryPercentile(name, strconv.FormatFloat(pv[i], 'f', -1, 64), ps[i]) + } + c.buff.WriteRune('\n') +} + +func (c *collector) writeGaugeInfo(name string, value metrics.GaugeInfoValue) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) + c.buff.WriteString(name) + c.buff.WriteString(" ") + var kvs []string + for k, v := range value { + kvs = append(kvs, fmt.Sprintf("%v=%q", k, v)) + } + sort.Strings(kvs) + c.buff.WriteString(fmt.Sprintf("{%v} 1\n\n", strings.Join(kvs, ", "))) +} + +func (c *collector) writeGaugeCounter(name string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(typeGaugeTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryCounter(name string, value interface{}) { + name = mutateKey(name + "_count") + c.buff.WriteString(fmt.Sprintf(typeCounterTpl, name)) + c.buff.WriteString(fmt.Sprintf(keyValueTpl, name, value)) +} + +func (c *collector) writeSummaryPercentile(name, p string, value interface{}) { + name = mutateKey(name) + c.buff.WriteString(fmt.Sprintf(keyQuantileTagValueTpl, name, p, value)) +} + +func mutateKey(key string) string { + return strings.ReplaceAll(key, "/", "_") +} diff --git a/metrics/prometheus/collector_test.go b/metrics/prometheus/collector_test.go new file mode 100644 index 000000000000..49b979432fbd --- /dev/null +++ b/metrics/prometheus/collector_test.go @@ -0,0 +1,65 @@ +// Copyright 2023 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package prometheus + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/XinFinOrg/XDPoSChain/metrics" + "github.com/XinFinOrg/XDPoSChain/metrics/internal" +) + +func TestMain(m *testing.M) { + metrics.Enable() + os.Exit(m.Run()) +} + +func TestCollector(t *testing.T) { + var ( + c = newCollector() + want string + ) + internal.ExampleMetrics().Each(func(name string, i interface{}) { + c.Add(name, i) + }) + if wantB, err := os.ReadFile("./testdata/prometheus.want"); err != nil { + t.Fatal(err) + } else { + want = string(wantB) + } + if have := c.buff.String(); have != want { + t.Logf("have\n%v", have) + t.Logf("have vs want:\n%v", findFirstDiffPos(have, want)) + t.Fatalf("unexpected collector output") + } +} + +func findFirstDiffPos(a, b string) string { + yy := strings.Split(b, "\n") + for i, x := range strings.Split(a, "\n") { + if i >= len(yy) { + return fmt.Sprintf("have:%d: %s\nwant:%d: ", i, x, i) + } + if y := yy[i]; x != y { + return fmt.Sprintf("have:%d: %s\nwant:%d: %s", i, x, i, y) + } + } + return "" +} diff --git a/metrics/prometheus/prometheus.go b/metrics/prometheus/prometheus.go new file mode 100644 index 000000000000..d11db3bae3eb --- /dev/null +++ b/metrics/prometheus/prometheus.go @@ -0,0 +1,52 @@ +// Copyright 2019 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +// Package prometheus exposes go-metrics into a Prometheus format. +package prometheus + +import ( + "fmt" + "net/http" + "sort" + + "github.com/XinFinOrg/XDPoSChain/log" + "github.com/XinFinOrg/XDPoSChain/metrics" +) + +// Handler returns an HTTP handler which dump metrics in Prometheus format. +func Handler(reg metrics.Registry) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Gather and pre-sort the metrics to avoid random listings + var names []string + reg.Each(func(name string, i interface{}) { + names = append(names, name) + }) + sort.Strings(names) + + // Aggregate all the metrics into a Prometheus collector + c := newCollector() + + for _, name := range names { + i := reg.Get(name) + if err := c.Add(name, i); err != nil { + log.Warn("Unknown Prometheus metric type", "type", fmt.Sprintf("%T", i)) + } + } + w.Header().Add("Content-Type", "text/plain") + w.Header().Add("Content-Length", fmt.Sprint(c.buff.Len())) + w.Write(c.buff.Bytes()) + }) +} diff --git a/metrics/prometheus/testdata/prometheus.want b/metrics/prometheus/testdata/prometheus.want new file mode 100644 index 000000000000..a999d83801c6 --- /dev/null +++ b/metrics/prometheus/testdata/prometheus.want @@ -0,0 +1,73 @@ +# TYPE system_cpu_schedlatency_count counter +system_cpu_schedlatency_count 5645 + +# TYPE system_cpu_schedlatency summary +system_cpu_schedlatency {quantile="0.5"} 0 +system_cpu_schedlatency {quantile="0.75"} 7168 +system_cpu_schedlatency {quantile="0.95"} 1.6777216e+07 +system_cpu_schedlatency {quantile="0.99"} 2.9360128e+07 +system_cpu_schedlatency {quantile="0.999"} 3.3554432e+07 +system_cpu_schedlatency {quantile="0.9999"} 3.3554432e+07 + +# TYPE system_memory_pauses_count counter +system_memory_pauses_count 14 + +# TYPE system_memory_pauses summary +system_memory_pauses {quantile="0.5"} 32768 +system_memory_pauses {quantile="0.75"} 57344 +system_memory_pauses {quantile="0.95"} 196608 +system_memory_pauses {quantile="0.99"} 196608 +system_memory_pauses {quantile="0.999"} 196608 +system_memory_pauses {quantile="0.9999"} 196608 + +# TYPE test_counter gauge +test_counter 12345 + +# TYPE test_counter_float64 gauge +test_counter_float64 54321.98 + +# TYPE test_gauge gauge +test_gauge 23456 + +# TYPE test_gauge_float64 gauge +test_gauge_float64 34567.89 + +# TYPE test_gauge_info gauge +test_gauge_info {arch="amd64", commit="7caa2d8163ae3132c1c2d6978c76610caee2d949", os="linux", protocol_versions="64 65 66", version="1.10.18-unstable"} 1 + +# TYPE test_histogram_count counter +test_histogram_count 3 + +# TYPE test_histogram summary +test_histogram {quantile="0.5"} 2 +test_histogram {quantile="0.75"} 3 +test_histogram {quantile="0.95"} 3 +test_histogram {quantile="0.99"} 3 +test_histogram {quantile="0.999"} 3 +test_histogram {quantile="0.9999"} 3 + +# TYPE test_meter gauge +test_meter 0 + +# TYPE test_resetting_timer_count counter +test_resetting_timer_count 6 + +# TYPE test_resetting_timer summary +test_resetting_timer {quantile="0.5"} 1.25e+07 +test_resetting_timer {quantile="0.75"} 4.05e+07 +test_resetting_timer {quantile="0.95"} 1.2e+08 +test_resetting_timer {quantile="0.99"} 1.2e+08 +test_resetting_timer {quantile="0.999"} 1.2e+08 +test_resetting_timer {quantile="0.9999"} 1.2e+08 + +# TYPE test_timer_count counter +test_timer_count 6 + +# TYPE test_timer summary +test_timer {quantile="0.5"} 2.25e+07 +test_timer {quantile="0.75"} 4.8e+07 +test_timer {quantile="0.95"} 1.2e+08 +test_timer {quantile="0.99"} 1.2e+08 +test_timer {quantile="0.999"} 1.2e+08 +test_timer {quantile="0.9999"} 1.2e+08 + diff --git a/metrics/registry.go b/metrics/registry.go index cc34c9dfd2c8..527da6238de7 100644 --- a/metrics/registry.go +++ b/metrics/registry.go @@ -1,29 +1,27 @@ package metrics import ( + "errors" "fmt" "reflect" + "sort" "strings" "sync" ) -// DuplicateMetric is the error returned by Registry.Register when a metric -// already exists. If you mean to Register that metric you must first +// ErrDuplicateMetric is the error returned by Registry.Register when a metric +// already exists. If you mean to Register that metric you must first // Unregister the existing metric. -type DuplicateMetric string - -func (err DuplicateMetric) Error() string { - return fmt.Sprintf("duplicate metric: %s", string(err)) -} +var ErrDuplicateMetric = errors.New("duplicate metric") // A Registry holds references to a set of metrics by name and can iterate // over them, calling callback functions provided by the user. // -// This is an interface so as to encourage other structs to implement +// This is an interface to encourage other structs to implement // the Registry API as appropriate. type Registry interface { - // Call the given function for each registered metric. + // Each call the given function for each registered metric. Each(func(string, interface{})) // Get the metric by the given name or nil if none is registered. @@ -32,7 +30,7 @@ type Registry interface { // GetAll metrics in the Registry. GetAll() map[string]map[string]interface{} - // Gets an existing metric or registers the given one. + // GetOrRegister gets an existing metric or registers the given one. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. GetOrRegister(string, interface{}) interface{} @@ -40,29 +38,47 @@ type Registry interface { // Register the given metric under the given name. Register(string, interface{}) error - // Run all registered healthchecks. + // RunHealthchecks run all registered healthchecks. RunHealthchecks() // Unregister the metric with the given name. Unregister(string) +} - // Unregister all metrics. (Mostly for testing.) - UnregisterAll() +type orderedRegistry struct { + StandardRegistry } -// The standard implementation of a Registry is a mutex-protected map -// of names to metrics. -type StandardRegistry struct { - metrics map[string]interface{} - mutex sync.Mutex +// Each call the given function for each registered metric. +func (r *orderedRegistry) Each(f func(string, interface{})) { + var names []string + reg := r.registered() + for name := range reg { + names = append(names, name) + } + sort.Strings(names) + for _, name := range names { + f(name, reg[name]) + } } -// Create a new registry. +// NewRegistry creates a new registry. func NewRegistry() Registry { - return &StandardRegistry{metrics: make(map[string]interface{})} + return new(StandardRegistry) +} + +// NewOrderedRegistry creates a new ordered registry (for testing). +func NewOrderedRegistry() Registry { + return new(orderedRegistry) } -// Call the given function for each registered metric. +// StandardRegistry the standard implementation of a Registry uses sync.map +// of names to metrics. +type StandardRegistry struct { + metrics sync.Map +} + +// Each call the given function for each registered metric. func (r *StandardRegistry) Each(f func(string, interface{})) { for name, i := range r.registered() { f(name, i) @@ -71,45 +87,57 @@ func (r *StandardRegistry) Each(f func(string, interface{})) { // Get the metric by the given name or nil if none is registered. func (r *StandardRegistry) Get(name string) interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - return r.metrics[name] + item, _ := r.metrics.Load(name) + return item } -// Gets an existing metric or creates and registers a new one. Threadsafe +// GetOrRegister gets an existing metric or creates and registers a new one. Threadsafe // alternative to calling Get and Register on failure. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. func (r *StandardRegistry) GetOrRegister(name string, i interface{}) interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - if metric, ok := r.metrics[name]; ok { - return metric + // fast path + cached, ok := r.metrics.Load(name) + if ok { + return cached } if v := reflect.ValueOf(i); v.Kind() == reflect.Func { i = v.Call(nil)[0].Interface() } - r.register(name, i) - return i + item, _, ok := r.loadOrRegister(name, i) + if !ok { + return i + } + return item } -// Register the given metric under the given name. Returns a DuplicateMetric +// Register the given metric under the given name. Returns a ErrDuplicateMetric // if a metric by the given name is already registered. func (r *StandardRegistry) Register(name string, i interface{}) error { - r.mutex.Lock() - defer r.mutex.Unlock() - return r.register(name, i) + // fast path + _, ok := r.metrics.Load(name) + if ok { + return fmt.Errorf("%w: %v", ErrDuplicateMetric, name) + } + + if v := reflect.ValueOf(i); v.Kind() == reflect.Func { + i = v.Call(nil)[0].Interface() + } + _, loaded, _ := r.loadOrRegister(name, i) + if loaded { + return fmt.Errorf("%w: %v", ErrDuplicateMetric, name) + } + return nil } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func (r *StandardRegistry) RunHealthchecks() { - r.mutex.Lock() - defer r.mutex.Unlock() - for _, i := range r.metrics { - if h, ok := i.(Healthcheck); ok { + r.metrics.Range(func(key, value any) bool { + if h, ok := value.(*Healthcheck); ok { h.Check() } - } + return true + }) } // GetAll metrics in the Registry @@ -118,13 +146,15 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { r.Each(func(name string, i interface{}) { values := make(map[string]interface{}) switch metric := i.(type) { - case Counter: - values["count"] = metric.Count() - case Gauge: - values["value"] = metric.Value() - case GaugeFloat64: - values["value"] = metric.Value() - case Healthcheck: + case *Counter: + values["count"] = metric.Snapshot().Count() + case *CounterFloat64: + values["count"] = metric.Snapshot().Count() + case *Gauge: + values["value"] = metric.Snapshot().Value() + case *GaugeFloat64: + values["value"] = metric.Snapshot().Value() + case *Healthcheck: values["error"] = nil metric.Check() if err := metric.Error(); nil != err { @@ -143,14 +173,14 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { values["95%"] = ps[2] values["99%"] = ps[3] values["99.9%"] = ps[4] - case Meter: + case *Meter: m := metric.Snapshot() values["count"] = m.Count() values["1m.rate"] = m.Rate1() values["5m.rate"] = m.Rate5() values["15m.rate"] = m.Rate15() values["mean.rate"] = m.RateMean() - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) values["count"] = t.Count() @@ -175,45 +205,31 @@ func (r *StandardRegistry) GetAll() map[string]map[string]interface{} { // Unregister the metric with the given name. func (r *StandardRegistry) Unregister(name string) { - r.mutex.Lock() - defer r.mutex.Unlock() r.stop(name) - delete(r.metrics, name) -} - -// Unregister all metrics. (Mostly for testing.) -func (r *StandardRegistry) UnregisterAll() { - r.mutex.Lock() - defer r.mutex.Unlock() - for name := range r.metrics { - r.stop(name) - delete(r.metrics, name) - } + r.metrics.LoadAndDelete(name) } -func (r *StandardRegistry) register(name string, i interface{}) error { - if _, ok := r.metrics[name]; ok { - return DuplicateMetric(name) - } +func (r *StandardRegistry) loadOrRegister(name string, i interface{}) (interface{}, bool, bool) { switch i.(type) { - case Counter, Gauge, GaugeFloat64, Healthcheck, Histogram, Meter, Timer, ResettingTimer: - r.metrics[name] = i + case *Counter, *CounterFloat64, *Gauge, *GaugeFloat64, *GaugeInfo, *Healthcheck, Histogram, *Meter, *Timer, *ResettingTimer: + default: + return nil, false, false } - return nil + item, loaded := r.metrics.LoadOrStore(name, i) + return item, loaded, true } func (r *StandardRegistry) registered() map[string]interface{} { - r.mutex.Lock() - defer r.mutex.Unlock() - metrics := make(map[string]interface{}, len(r.metrics)) - for name, i := range r.metrics { - metrics[name] = i - } + metrics := make(map[string]interface{}) + r.metrics.Range(func(key, value any) bool { + metrics[key.(string)] = value + return true + }) return metrics } func (r *StandardRegistry) stop(name string) { - if i, ok := r.metrics[name]; ok { + if i, ok := r.metrics.Load(name); ok { if s, ok := i.(Stoppable); ok { s.Stop() } @@ -244,7 +260,7 @@ func NewPrefixedChildRegistry(parent Registry, prefix string) Registry { } } -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func (r *PrefixedRegistry) Each(fn func(string, interface{})) { wrappedFn := func(prefix string) func(string, interface{}) { return func(name string, iface interface{}) { @@ -276,7 +292,7 @@ func (r *PrefixedRegistry) Get(name string) interface{} { return r.underlying.Get(realName) } -// Gets an existing metric or registers the given one. +// GetOrRegister gets an existing metric or registers the given one. // The interface can be the metric to register if not found in registry, // or a function returning the metric for lazy instantiation. func (r *PrefixedRegistry) GetOrRegister(name string, metric interface{}) interface{} { @@ -290,7 +306,7 @@ func (r *PrefixedRegistry) Register(name string, metric interface{}) error { return r.underlying.Register(realName, metric) } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func (r *PrefixedRegistry) RunHealthchecks() { r.underlying.RunHealthchecks() } @@ -306,14 +322,11 @@ func (r *PrefixedRegistry) Unregister(name string) { r.underlying.Unregister(realName) } -// Unregister all metrics. (Mostly for testing.) -func (r *PrefixedRegistry) UnregisterAll() { - r.underlying.UnregisterAll() -} - -var DefaultRegistry Registry = NewRegistry() +var ( + DefaultRegistry = NewRegistry() +) -// Call the given function for each registered metric. +// Each call the given function for each registered metric. func Each(f func(string, interface{})) { DefaultRegistry.Each(f) } @@ -323,19 +336,19 @@ func Get(name string) interface{} { return DefaultRegistry.Get(name) } -// Gets an existing metric or creates and registers a new one. Threadsafe +// GetOrRegister gets an existing metric or creates and registers a new one. Threadsafe // alternative to calling Get and Register on failure. func GetOrRegister(name string, i interface{}) interface{} { return DefaultRegistry.GetOrRegister(name, i) } -// Register the given metric under the given name. Returns a DuplicateMetric +// Register the given metric under the given name. Returns a ErrDuplicateMetric // if a metric by the given name is already registered. func Register(name string, i interface{}) error { return DefaultRegistry.Register(name, i) } -// Register the given metric under the given name. Panics if a metric by the +// MustRegister register the given metric under the given name. Panics if a metric by the // given name is already registered. func MustRegister(name string, i interface{}) { if err := Register(name, i); err != nil { @@ -343,7 +356,7 @@ func MustRegister(name string, i interface{}) { } } -// Run all registered healthchecks. +// RunHealthchecks run all registered healthchecks. func RunHealthchecks() { DefaultRegistry.RunHealthchecks() } diff --git a/metrics/registry_test.go b/metrics/registry_test.go index 416c82d0b207..bdc58fee6c74 100644 --- a/metrics/registry_test.go +++ b/metrics/registry_test.go @@ -1,6 +1,7 @@ package metrics import ( + "sync" "testing" ) @@ -13,6 +14,30 @@ func BenchmarkRegistry(b *testing.B) { } } +func BenchmarkRegistryGetOrRegisterParallel_8(b *testing.B) { + benchmarkRegistryGetOrRegisterParallel(b, 8) +} + +func BenchmarkRegistryGetOrRegisterParallel_32(b *testing.B) { + benchmarkRegistryGetOrRegisterParallel(b, 32) +} + +func benchmarkRegistryGetOrRegisterParallel(b *testing.B, amount int) { + r := NewRegistry() + b.ResetTimer() + var wg sync.WaitGroup + for i := 0; i < amount; i++ { + wg.Add(1) + go func() { + for i := 0; i < b.N; i++ { + r.GetOrRegister("foo", NewMeter) + } + wg.Done() + }() + } + wg.Wait() +} + func TestRegistry(t *testing.T) { r := NewRegistry() r.Register("foo", NewCounter()) @@ -22,7 +47,7 @@ func TestRegistry(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -48,7 +73,7 @@ func TestRegistryDuplicate(t *testing.T) { i := 0 r.Each(func(name string, iface interface{}) { i++ - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -60,11 +85,11 @@ func TestRegistryDuplicate(t *testing.T) { func TestRegistryGet(t *testing.T) { r := NewRegistry() r.Register("foo", NewCounter()) - if count := r.Get("foo").(Counter).Count(); count != 0 { + if count := r.Get("foo").(*Counter).Snapshot().Count(); count != 0 { t.Fatal(count) } - r.Get("foo").(Counter).Inc(1) - if count := r.Get("foo").(Counter).Count(); count != 1 { + r.Get("foo").(*Counter).Inc(1) + if count := r.Get("foo").(*Counter).Snapshot().Count(); count != 1 { t.Fatal(count) } } @@ -75,7 +100,7 @@ func TestRegistryGetOrRegister(t *testing.T) { // First metric wins with GetOrRegister _ = r.GetOrRegister("foo", NewCounter()) m := r.GetOrRegister("foo", NewGauge()) - if _, ok := m.(Counter); !ok { + if _, ok := m.(*Counter); !ok { t.Fatal(m) } @@ -85,7 +110,7 @@ func TestRegistryGetOrRegister(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -100,7 +125,7 @@ func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { // First metric wins with GetOrRegister _ = r.GetOrRegister("foo", NewCounter) m := r.GetOrRegister("foo", NewGauge) - if _, ok := m.(Counter); !ok { + if _, ok := m.(*Counter); !ok { t.Fatal(m) } @@ -110,7 +135,7 @@ func TestRegistryGetOrRegisterWithLazyInstantiation(t *testing.T) { if name != "foo" { t.Fatal(name) } - if _, ok := iface.(Counter); !ok { + if _, ok := iface.(*Counter); !ok { t.Fatal(iface) } }) @@ -270,7 +295,10 @@ func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { if err != nil { t.Fatal(err.Error()) } - r2.Register("baz", NewCounter()) + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } c := NewCounter() Register("bars", c) @@ -278,7 +306,7 @@ func TestChildPrefixedRegistryOfChildRegister(t *testing.T) { r2.Each(func(name string, m interface{}) { i++ if name != "prefix.prefix2.baz" { - //t.Fatal(name) + t.Fatal(name) } }) if i != 1 { @@ -293,7 +321,10 @@ func TestWalkRegistries(t *testing.T) { if err != nil { t.Fatal(err.Error()) } - r2.Register("baz", NewCounter()) + err = r2.Register("baz", NewCounter()) + if err != nil { + t.Fatal(err.Error()) + } c := NewCounter() Register("bars", c) @@ -301,5 +332,4 @@ func TestWalkRegistries(t *testing.T) { if prefix != "prefix.prefix2." { t.Fatal(prefix) } - } diff --git a/metrics/resetting_sample.go b/metrics/resetting_sample.go new file mode 100644 index 000000000000..730ef93416d5 --- /dev/null +++ b/metrics/resetting_sample.go @@ -0,0 +1,24 @@ +package metrics + +// ResettingSample converts an ordinary sample into one that resets whenever its +// snapshot is retrieved. This will break for multi-monitor systems, but when only +// a single metric is being pushed out, this ensure that low-frequency events don't +// skew th charts indefinitely. +func ResettingSample(sample Sample) Sample { + return &resettingSample{ + Sample: sample, + } +} + +// resettingSample is a simple wrapper around a sample that resets it upon the +// snapshot retrieval. +type resettingSample struct { + Sample +} + +// Snapshot returns a read-only copy of the sample with the original reset. +func (rs *resettingSample) Snapshot() *sampleSnapshot { + s := rs.Sample.Snapshot() + rs.Sample.Clear() + return s +} diff --git a/metrics/resetting_timer.go b/metrics/resetting_timer.go index 57bcb31343fd..1b3e87bc3d12 100644 --- a/metrics/resetting_timer.go +++ b/metrics/resetting_timer.go @@ -1,37 +1,21 @@ package metrics import ( - "math" - "sort" "sync" "time" ) -// Initial slice capacity for the values stored in a ResettingTimer -const InitialResettingTimerSliceCap = 10 - -// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. -type ResettingTimer interface { - Values() []int64 - Snapshot() ResettingTimer - Percentiles([]float64) []int64 - Mean() float64 - Time(func()) - Update(time.Duration) - UpdateSince(time.Time) -} - // GetOrRegisterResettingTimer returns an existing ResettingTimer or constructs and registers a -// new StandardResettingTimer. -func GetOrRegisterResettingTimer(name string, r Registry) ResettingTimer { +// new ResettingTimer. +func GetOrRegisterResettingTimer(name string, r Registry) *ResettingTimer { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewResettingTimer).(ResettingTimer) + return r.GetOrRegister(name, NewResettingTimer).(*ResettingTimer) } -// NewRegisteredResettingTimer constructs and registers a new StandardResettingTimer. -func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { +// NewRegisteredResettingTimer constructs and registers a new ResettingTimer. +func NewRegisteredResettingTimer(name string, r Registry) *ResettingTimer { c := NewResettingTimer() if nil == r { r = DefaultRegistry @@ -40,198 +24,114 @@ func NewRegisteredResettingTimer(name string, r Registry) ResettingTimer { return c } -// NewResettingTimer constructs a new StandardResettingTimer -func NewResettingTimer() ResettingTimer { - if !Enabled { - return NilResettingTimer{} - } - return &StandardResettingTimer{ - values: make([]int64, 0, InitialResettingTimerSliceCap), +// NewResettingTimer constructs a new ResettingTimer +func NewResettingTimer() *ResettingTimer { + return &ResettingTimer{ + values: make([]int64, 0, 10), } } -// NilResettingTimer is a no-op ResettingTimer. -type NilResettingTimer struct { -} - -// Values is a no-op. -func (NilResettingTimer) Values() []int64 { return nil } - -// Snapshot is a no-op. -func (NilResettingTimer) Snapshot() ResettingTimer { return NilResettingTimer{} } - -// Time is a no-op. -func (NilResettingTimer) Time(func()) {} - -// Update is a no-op. -func (NilResettingTimer) Update(time.Duration) {} - -// Percentiles panics. -func (NilResettingTimer) Percentiles([]float64) []int64 { - panic("Percentiles called on a NilResettingTimer") -} - -// Mean panics. -func (NilResettingTimer) Mean() float64 { - panic("Mean called on a NilResettingTimer") -} - -// UpdateSince is a no-op. -func (NilResettingTimer) UpdateSince(time.Time) {} - -// StandardResettingTimer is the standard implementation of a ResettingTimer. -// and Meter. -type StandardResettingTimer struct { +// ResettingTimer is used for storing aggregated values for timers, which are reset on every flush interval. +type ResettingTimer struct { values []int64 - mutex sync.Mutex -} + sum int64 // sum is a running count of the total sum, used later to calculate mean -// Values returns a slice with all measurements. -func (t *StandardResettingTimer) Values() []int64 { - return t.values + mutex sync.Mutex } // Snapshot resets the timer and returns a read-only copy of its contents. -func (t *StandardResettingTimer) Snapshot() ResettingTimer { +func (t *ResettingTimer) Snapshot() *ResettingTimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() - currentValues := t.values - t.values = make([]int64, 0, InitialResettingTimerSliceCap) - - return &ResettingTimerSnapshot{ - values: currentValues, + snapshot := &ResettingTimerSnapshot{} + if len(t.values) > 0 { + snapshot.mean = float64(t.sum) / float64(len(t.values)) + snapshot.values = t.values + t.values = make([]int64, 0, 10) } -} - -// Percentiles panics. -func (t *StandardResettingTimer) Percentiles([]float64) []int64 { - panic("Percentiles called on a StandardResettingTimer") -} - -// Mean panics. -func (t *StandardResettingTimer) Mean() float64 { - panic("Mean called on a StandardResettingTimer") + t.sum = 0 + return snapshot } // Record the duration of the execution of the given function. -func (t *StandardResettingTimer) Time(f func()) { +func (t *ResettingTimer) Time(f func()) { ts := time.Now() f() t.Update(time.Since(ts)) } // Record the duration of an event. -func (t *StandardResettingTimer) Update(d time.Duration) { +func (t *ResettingTimer) Update(d time.Duration) { + if !metricsEnabled { + return + } t.mutex.Lock() defer t.mutex.Unlock() t.values = append(t.values, int64(d)) + t.sum += int64(d) } // Record the duration of an event that started at a time and ends now. -func (t *StandardResettingTimer) UpdateSince(ts time.Time) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.values = append(t.values, int64(time.Since(ts))) +func (t *ResettingTimer) UpdateSince(ts time.Time) { + t.Update(time.Since(ts)) } // ResettingTimerSnapshot is a point-in-time copy of another ResettingTimer. type ResettingTimerSnapshot struct { values []int64 mean float64 - thresholdBoundaries []int64 + max int64 + min int64 + thresholdBoundaries []float64 calculated bool } -// Snapshot returns the snapshot. -func (t *ResettingTimerSnapshot) Snapshot() ResettingTimer { return t } - -// Time panics. -func (*ResettingTimerSnapshot) Time(func()) { - panic("Time called on a ResettingTimerSnapshot") -} - -// Update panics. -func (*ResettingTimerSnapshot) Update(time.Duration) { - panic("Update called on a ResettingTimerSnapshot") -} - -// UpdateSince panics. -func (*ResettingTimerSnapshot) UpdateSince(time.Time) { - panic("UpdateSince called on a ResettingTimerSnapshot") -} - -// Values returns all values from snapshot. -func (t *ResettingTimerSnapshot) Values() []int64 { - return t.values +// Count return the length of the values from snapshot. +func (t *ResettingTimerSnapshot) Count() int { + return len(t.values) } // Percentiles returns the boundaries for the input percentiles. -func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []int64 { +// note: this method is not thread safe +func (t *ResettingTimerSnapshot) Percentiles(percentiles []float64) []float64 { t.calc(percentiles) - return t.thresholdBoundaries } // Mean returns the mean of the snapshotted values +// note: this method is not thread safe func (t *ResettingTimerSnapshot) Mean() float64 { if !t.calculated { - t.calc([]float64{}) + t.calc(nil) } return t.mean } -func (t *ResettingTimerSnapshot) calc(percentiles []float64) { - sort.Sort(Int64Slice(t.values)) - - count := len(t.values) - if count > 0 { - min := t.values[0] - max := t.values[count-1] - - cumulativeValues := make([]int64, count) - cumulativeValues[0] = min - for i := 1; i < count; i++ { - cumulativeValues[i] = t.values[i] + cumulativeValues[i-1] - } - - t.thresholdBoundaries = make([]int64, len(percentiles)) - - thresholdBoundary := max - - for i, pct := range percentiles { - if count > 1 { - var abs float64 - if pct >= 0 { - abs = pct - } else { - abs = 100 + pct - } - // poor man's math.Round(x): - // math.Floor(x + 0.5) - indexOfPerc := int(math.Floor(((abs / 100.0) * float64(count)) + 0.5)) - if pct >= 0 { - indexOfPerc -= 1 // index offset=0 - } - thresholdBoundary = t.values[indexOfPerc] - } - - t.thresholdBoundaries[i] = thresholdBoundary - } - - sum := cumulativeValues[count-1] - t.mean = float64(sum) / float64(count) - } else { - t.thresholdBoundaries = make([]int64, len(percentiles)) - t.mean = 0 +// Max returns the max of the snapshotted values +// note: this method is not thread safe +func (t *ResettingTimerSnapshot) Max() int64 { + if !t.calculated { + t.calc(nil) } - - t.calculated = true + return t.max } -// Int64Slice attaches the methods of sort.Interface to []int64, sorting in increasing order. -type Int64Slice []int64 +// Min returns the min of the snapshotted values +// note: this method is not thread safe +func (t *ResettingTimerSnapshot) Min() int64 { + if !t.calculated { + t.calc(nil) + } + return t.min +} -func (s Int64Slice) Len() int { return len(s) } -func (s Int64Slice) Less(i, j int) bool { return s[i] < s[j] } -func (s Int64Slice) Swap(i, j int) { s[i], s[j] = s[j], s[i] } +func (t *ResettingTimerSnapshot) calc(percentiles []float64) { + scores := CalculatePercentiles(t.values, percentiles) + t.thresholdBoundaries = scores + if len(t.values) == 0 { + return + } + t.min = t.values[0] + t.max = t.values[len(t.values)-1] +} diff --git a/metrics/resetting_timer_test.go b/metrics/resetting_timer_test.go index 58fd47f35245..4571fc8eb052 100644 --- a/metrics/resetting_timer_test.go +++ b/metrics/resetting_timer_test.go @@ -10,9 +10,9 @@ func TestResettingTimer(t *testing.T) { values []int64 start int end int - wantP50 int64 - wantP95 int64 - wantP99 int64 + wantP50 float64 + wantP95 float64 + wantP99 float64 wantMean float64 wantMin int64 wantMax int64 @@ -21,14 +21,14 @@ func TestResettingTimer(t *testing.T) { values: []int64{}, start: 1, end: 11, - wantP50: 5, wantP95: 10, wantP99: 10, + wantP50: 5.5, wantP95: 10, wantP99: 10, wantMin: 1, wantMax: 10, wantMean: 5.5, }, { values: []int64{}, start: 1, end: 101, - wantP50: 50, wantP95: 95, wantP99: 99, + wantP50: 50.5, wantP95: 95.94999999999999, wantP99: 99.99, wantMin: 1, wantMax: 100, wantMean: 50.5, }, { @@ -56,11 +56,11 @@ func TestResettingTimer(t *testing.T) { values: []int64{1, 10}, start: 0, end: 0, - wantP50: 1, wantP95: 10, wantP99: 10, + wantP50: 5.5, wantP95: 10, wantP99: 10, wantMin: 1, wantMax: 10, wantMean: 5.5, }, } - for ind, tt := range tests { + for i, tt := range tests { timer := NewResettingTimer() for i := tt.start; i < tt.end; i++ { @@ -70,37 +70,128 @@ func TestResettingTimer(t *testing.T) { for _, v := range tt.values { timer.Update(time.Duration(v)) } - snap := timer.Snapshot() - ps := snap.Percentiles([]float64{50, 95, 99}) + ps := snap.Percentiles([]float64{0.50, 0.95, 0.99}) - val := snap.Values() + if have, want := snap.Min(), tt.wantMin; have != want { + t.Fatalf("%d: min: have %d, want %d", i, have, want) + } + if have, want := snap.Max(), tt.wantMax; have != want { + t.Fatalf("%d: max: have %d, want %d", i, have, want) + } + if have, want := snap.Mean(), tt.wantMean; have != want { + t.Fatalf("%d: mean: have %v, want %v", i, have, want) + } + if have, want := ps[0], tt.wantP50; have != want { + t.Errorf("%d: p50: have %v, want %v", i, have, want) + } + if have, want := ps[1], tt.wantP95; have != want { + t.Errorf("%d: p95: have %v, want %v", i, have, want) + } + if have, want := ps[2], tt.wantP99; have != want { + t.Errorf("%d: p99: have %v, want %v", i, have, want) + } + } +} - if len(val) > 0 { - if tt.wantMin != val[0] { - t.Fatalf("%d: min: got %d, want %d", ind, val[0], tt.wantMin) - } +func TestResettingTimerWithFivePercentiles(t *testing.T) { + tests := []struct { + values []int64 + start int + end int + wantP05 float64 + wantP20 float64 + wantP50 float64 + wantP95 float64 + wantP99 float64 + wantMean float64 + wantMin int64 + wantMax int64 + }{ + { + values: []int64{}, + start: 1, + end: 11, + wantP05: 1, wantP20: 2.2, wantP50: 5.5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + { + values: []int64{}, + start: 1, + end: 101, + wantP05: 5.050000000000001, wantP20: 20.200000000000003, wantP50: 50.5, wantP95: 95.94999999999999, wantP99: 99.99, + wantMin: 1, wantMax: 100, wantMean: 50.5, + }, + { + values: []int64{1}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 1, wantP95: 1, wantP99: 1, + wantMin: 1, wantMax: 1, wantMean: 1, + }, + { + values: []int64{0}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{}, + start: 0, + end: 0, + wantP05: 0, wantP20: 0, wantP50: 0, wantP95: 0, wantP99: 0, + wantMin: 0, wantMax: 0, wantMean: 0, + }, + { + values: []int64{1, 10}, + start: 0, + end: 0, + wantP05: 1, wantP20: 1, wantP50: 5.5, wantP95: 10, wantP99: 10, + wantMin: 1, wantMax: 10, wantMean: 5.5, + }, + } + for ind, tt := range tests { + timer := NewResettingTimer() - if tt.wantMax != val[len(val)-1] { - t.Fatalf("%d: max: got %d, want %d", ind, val[len(val)-1], tt.wantMax) - } + for i := tt.start; i < tt.end; i++ { + tt.values = append(tt.values, int64(i)) } - if tt.wantMean != snap.Mean() { - t.Fatalf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + for _, v := range tt.values { + timer.Update(time.Duration(v)) } - if tt.wantP50 != ps[0] { - t.Fatalf("%d: p50: got %d, want %d", ind, ps[0], tt.wantP50) + snap := timer.Snapshot() + + ps := snap.Percentiles([]float64{0.05, 0.20, 0.50, 0.95, 0.99}) + + if tt.wantMin != snap.Min() { + t.Errorf("%d: min: got %d, want %d", ind, snap.Min(), tt.wantMin) } - if tt.wantP95 != ps[1] { - t.Fatalf("%d: p95: got %d, want %d", ind, ps[1], tt.wantP95) + if tt.wantMax != snap.Max() { + t.Errorf("%d: max: got %d, want %d", ind, snap.Max(), tt.wantMax) } - if tt.wantP99 != ps[2] { - t.Fatalf("%d: p99: got %d, want %d", ind, ps[2], tt.wantP99) + if tt.wantMean != snap.Mean() { + t.Errorf("%d: mean: got %.2f, want %.2f", ind, snap.Mean(), tt.wantMean) + } + if tt.wantP05 != ps[0] { + t.Errorf("%d: p05: got %v, want %v", ind, ps[0], tt.wantP05) + } + if tt.wantP20 != ps[1] { + t.Errorf("%d: p20: got %v, want %v", ind, ps[1], tt.wantP20) + } + if tt.wantP50 != ps[2] { + t.Errorf("%d: p50: got %v, want %v", ind, ps[2], tt.wantP50) + } + if tt.wantP95 != ps[3] { + t.Errorf("%d: p95: got %v, want %v", ind, ps[3], tt.wantP95) + } + if tt.wantP99 != ps[4] { + t.Errorf("%d: p99: got %v, want %v", ind, ps[4], tt.wantP99) } } } diff --git a/metrics/runtime.go b/metrics/runtime.go deleted file mode 100644 index 9450c479bad7..000000000000 --- a/metrics/runtime.go +++ /dev/null @@ -1,212 +0,0 @@ -package metrics - -import ( - "runtime" - "runtime/pprof" - "time" -) - -var ( - memStats runtime.MemStats - runtimeMetrics struct { - MemStats struct { - Alloc Gauge - BuckHashSys Gauge - DebugGC Gauge - EnableGC Gauge - Frees Gauge - HeapAlloc Gauge - HeapIdle Gauge - HeapInuse Gauge - HeapObjects Gauge - HeapReleased Gauge - HeapSys Gauge - LastGC Gauge - Lookups Gauge - Mallocs Gauge - MCacheInuse Gauge - MCacheSys Gauge - MSpanInuse Gauge - MSpanSys Gauge - NextGC Gauge - NumGC Gauge - GCCPUFraction GaugeFloat64 - PauseNs Histogram - PauseTotalNs Gauge - StackInuse Gauge - StackSys Gauge - Sys Gauge - TotalAlloc Gauge - } - NumCgoCall Gauge - NumGoroutine Gauge - NumThread Gauge - ReadMemStats Timer - } - frees uint64 - lookups uint64 - mallocs uint64 - numGC uint32 - numCgoCalls int64 - - threadCreateProfile = pprof.Lookup("threadcreate") -) - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called as a goroutine. -func CaptureRuntimeMemStats(r Registry, d time.Duration) { - for range time.Tick(d) { - CaptureRuntimeMemStatsOnce(r) - } -} - -// Capture new values for the Go runtime statistics exported in -// runtime.MemStats. This is designed to be called in a background -// goroutine. Giving a registry which has not been given to -// RegisterRuntimeMemStats will panic. -// -// Be very careful with this because runtime.ReadMemStats calls the C -// functions runtime·semacquire(&runtime·worldsema) and runtime·stoptheworld() -// and that last one does what it says on the tin. -func CaptureRuntimeMemStatsOnce(r Registry) { - t := time.Now() - runtime.ReadMemStats(&memStats) // This takes 50-200us. - runtimeMetrics.ReadMemStats.UpdateSince(t) - - runtimeMetrics.MemStats.Alloc.Update(int64(memStats.Alloc)) - runtimeMetrics.MemStats.BuckHashSys.Update(int64(memStats.BuckHashSys)) - if memStats.DebugGC { - runtimeMetrics.MemStats.DebugGC.Update(1) - } else { - runtimeMetrics.MemStats.DebugGC.Update(0) - } - if memStats.EnableGC { - runtimeMetrics.MemStats.EnableGC.Update(1) - } else { - runtimeMetrics.MemStats.EnableGC.Update(0) - } - - runtimeMetrics.MemStats.Frees.Update(int64(memStats.Frees - frees)) - runtimeMetrics.MemStats.HeapAlloc.Update(int64(memStats.HeapAlloc)) - runtimeMetrics.MemStats.HeapIdle.Update(int64(memStats.HeapIdle)) - runtimeMetrics.MemStats.HeapInuse.Update(int64(memStats.HeapInuse)) - runtimeMetrics.MemStats.HeapObjects.Update(int64(memStats.HeapObjects)) - runtimeMetrics.MemStats.HeapReleased.Update(int64(memStats.HeapReleased)) - runtimeMetrics.MemStats.HeapSys.Update(int64(memStats.HeapSys)) - runtimeMetrics.MemStats.LastGC.Update(int64(memStats.LastGC)) - runtimeMetrics.MemStats.Lookups.Update(int64(memStats.Lookups - lookups)) - runtimeMetrics.MemStats.Mallocs.Update(int64(memStats.Mallocs - mallocs)) - runtimeMetrics.MemStats.MCacheInuse.Update(int64(memStats.MCacheInuse)) - runtimeMetrics.MemStats.MCacheSys.Update(int64(memStats.MCacheSys)) - runtimeMetrics.MemStats.MSpanInuse.Update(int64(memStats.MSpanInuse)) - runtimeMetrics.MemStats.MSpanSys.Update(int64(memStats.MSpanSys)) - runtimeMetrics.MemStats.NextGC.Update(int64(memStats.NextGC)) - runtimeMetrics.MemStats.NumGC.Update(int64(memStats.NumGC - numGC)) - runtimeMetrics.MemStats.GCCPUFraction.Update(gcCPUFraction(&memStats)) - - // - i := numGC % uint32(len(memStats.PauseNs)) - ii := memStats.NumGC % uint32(len(memStats.PauseNs)) - if memStats.NumGC-numGC >= uint32(len(memStats.PauseNs)) { - for i = 0; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } else { - if i > ii { - for ; i < uint32(len(memStats.PauseNs)); i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - i = 0 - } - for ; i < ii; i++ { - runtimeMetrics.MemStats.PauseNs.Update(int64(memStats.PauseNs[i])) - } - } - frees = memStats.Frees - lookups = memStats.Lookups - mallocs = memStats.Mallocs - numGC = memStats.NumGC - - runtimeMetrics.MemStats.PauseTotalNs.Update(int64(memStats.PauseTotalNs)) - runtimeMetrics.MemStats.StackInuse.Update(int64(memStats.StackInuse)) - runtimeMetrics.MemStats.StackSys.Update(int64(memStats.StackSys)) - runtimeMetrics.MemStats.Sys.Update(int64(memStats.Sys)) - runtimeMetrics.MemStats.TotalAlloc.Update(int64(memStats.TotalAlloc)) - - currentNumCgoCalls := numCgoCall() - runtimeMetrics.NumCgoCall.Update(currentNumCgoCalls - numCgoCalls) - numCgoCalls = currentNumCgoCalls - - runtimeMetrics.NumGoroutine.Update(int64(runtime.NumGoroutine())) - - runtimeMetrics.NumThread.Update(int64(threadCreateProfile.Count())) -} - -// Register runtimeMetrics for the Go runtime statistics exported in runtime and -// specifically runtime.MemStats. The runtimeMetrics are named by their -// fully-qualified Go symbols, i.e. runtime.MemStats.Alloc. -func RegisterRuntimeMemStats(r Registry) { - runtimeMetrics.MemStats.Alloc = NewGauge() - runtimeMetrics.MemStats.BuckHashSys = NewGauge() - runtimeMetrics.MemStats.DebugGC = NewGauge() - runtimeMetrics.MemStats.EnableGC = NewGauge() - runtimeMetrics.MemStats.Frees = NewGauge() - runtimeMetrics.MemStats.HeapAlloc = NewGauge() - runtimeMetrics.MemStats.HeapIdle = NewGauge() - runtimeMetrics.MemStats.HeapInuse = NewGauge() - runtimeMetrics.MemStats.HeapObjects = NewGauge() - runtimeMetrics.MemStats.HeapReleased = NewGauge() - runtimeMetrics.MemStats.HeapSys = NewGauge() - runtimeMetrics.MemStats.LastGC = NewGauge() - runtimeMetrics.MemStats.Lookups = NewGauge() - runtimeMetrics.MemStats.Mallocs = NewGauge() - runtimeMetrics.MemStats.MCacheInuse = NewGauge() - runtimeMetrics.MemStats.MCacheSys = NewGauge() - runtimeMetrics.MemStats.MSpanInuse = NewGauge() - runtimeMetrics.MemStats.MSpanSys = NewGauge() - runtimeMetrics.MemStats.NextGC = NewGauge() - runtimeMetrics.MemStats.NumGC = NewGauge() - runtimeMetrics.MemStats.GCCPUFraction = NewGaugeFloat64() - runtimeMetrics.MemStats.PauseNs = NewHistogram(NewExpDecaySample(1028, 0.015)) - runtimeMetrics.MemStats.PauseTotalNs = NewGauge() - runtimeMetrics.MemStats.StackInuse = NewGauge() - runtimeMetrics.MemStats.StackSys = NewGauge() - runtimeMetrics.MemStats.Sys = NewGauge() - runtimeMetrics.MemStats.TotalAlloc = NewGauge() - runtimeMetrics.NumCgoCall = NewGauge() - runtimeMetrics.NumGoroutine = NewGauge() - runtimeMetrics.NumThread = NewGauge() - runtimeMetrics.ReadMemStats = NewTimer() - - r.Register("runtime.MemStats.Alloc", runtimeMetrics.MemStats.Alloc) - r.Register("runtime.MemStats.BuckHashSys", runtimeMetrics.MemStats.BuckHashSys) - r.Register("runtime.MemStats.DebugGC", runtimeMetrics.MemStats.DebugGC) - r.Register("runtime.MemStats.EnableGC", runtimeMetrics.MemStats.EnableGC) - r.Register("runtime.MemStats.Frees", runtimeMetrics.MemStats.Frees) - r.Register("runtime.MemStats.HeapAlloc", runtimeMetrics.MemStats.HeapAlloc) - r.Register("runtime.MemStats.HeapIdle", runtimeMetrics.MemStats.HeapIdle) - r.Register("runtime.MemStats.HeapInuse", runtimeMetrics.MemStats.HeapInuse) - r.Register("runtime.MemStats.HeapObjects", runtimeMetrics.MemStats.HeapObjects) - r.Register("runtime.MemStats.HeapReleased", runtimeMetrics.MemStats.HeapReleased) - r.Register("runtime.MemStats.HeapSys", runtimeMetrics.MemStats.HeapSys) - r.Register("runtime.MemStats.LastGC", runtimeMetrics.MemStats.LastGC) - r.Register("runtime.MemStats.Lookups", runtimeMetrics.MemStats.Lookups) - r.Register("runtime.MemStats.Mallocs", runtimeMetrics.MemStats.Mallocs) - r.Register("runtime.MemStats.MCacheInuse", runtimeMetrics.MemStats.MCacheInuse) - r.Register("runtime.MemStats.MCacheSys", runtimeMetrics.MemStats.MCacheSys) - r.Register("runtime.MemStats.MSpanInuse", runtimeMetrics.MemStats.MSpanInuse) - r.Register("runtime.MemStats.MSpanSys", runtimeMetrics.MemStats.MSpanSys) - r.Register("runtime.MemStats.NextGC", runtimeMetrics.MemStats.NextGC) - r.Register("runtime.MemStats.NumGC", runtimeMetrics.MemStats.NumGC) - r.Register("runtime.MemStats.GCCPUFraction", runtimeMetrics.MemStats.GCCPUFraction) - r.Register("runtime.MemStats.PauseNs", runtimeMetrics.MemStats.PauseNs) - r.Register("runtime.MemStats.PauseTotalNs", runtimeMetrics.MemStats.PauseTotalNs) - r.Register("runtime.MemStats.StackInuse", runtimeMetrics.MemStats.StackInuse) - r.Register("runtime.MemStats.StackSys", runtimeMetrics.MemStats.StackSys) - r.Register("runtime.MemStats.Sys", runtimeMetrics.MemStats.Sys) - r.Register("runtime.MemStats.TotalAlloc", runtimeMetrics.MemStats.TotalAlloc) - r.Register("runtime.NumCgoCall", runtimeMetrics.NumCgoCall) - r.Register("runtime.NumGoroutine", runtimeMetrics.NumGoroutine) - r.Register("runtime.NumThread", runtimeMetrics.NumThread) - r.Register("runtime.ReadMemStats", runtimeMetrics.ReadMemStats) -} diff --git a/metrics/runtime_cgo.go b/metrics/runtime_cgo.go deleted file mode 100644 index e3391f4e89fa..000000000000 --- a/metrics/runtime_cgo.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build cgo -// +build !appengine - -package metrics - -import "runtime" - -func numCgoCall() int64 { - return runtime.NumCgoCall() -} diff --git a/metrics/runtime_gccpufraction.go b/metrics/runtime_gccpufraction.go deleted file mode 100644 index ca12c05bac74..000000000000 --- a/metrics/runtime_gccpufraction.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return memStats.GCCPUFraction -} diff --git a/metrics/runtime_no_cgo.go b/metrics/runtime_no_cgo.go deleted file mode 100644 index 616a3b4751be..000000000000 --- a/metrics/runtime_no_cgo.go +++ /dev/null @@ -1,7 +0,0 @@ -// +build !cgo appengine - -package metrics - -func numCgoCall() int64 { - return 0 -} diff --git a/metrics/runtime_no_gccpufraction.go b/metrics/runtime_no_gccpufraction.go deleted file mode 100644 index be96aa6f1be9..000000000000 --- a/metrics/runtime_no_gccpufraction.go +++ /dev/null @@ -1,9 +0,0 @@ -// +build !go1.5 - -package metrics - -import "runtime" - -func gcCPUFraction(memStats *runtime.MemStats) float64 { - return 0 -} diff --git a/metrics/runtime_test.go b/metrics/runtime_test.go deleted file mode 100644 index f85f7868f71a..000000000000 --- a/metrics/runtime_test.go +++ /dev/null @@ -1,88 +0,0 @@ -package metrics - -import ( - "runtime" - "testing" - "time" -) - -func BenchmarkRuntimeMemStats(b *testing.B) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - b.ResetTimer() - for i := 0; i < b.N; i++ { - CaptureRuntimeMemStatsOnce(r) - } -} - -func TestRuntimeMemStats(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - zero := runtimeMetrics.MemStats.PauseNs.Count() // Get a "zero" since GC may have run before these tests. - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 1 { - t.Fatal(count - zero) - } - runtime.GC() - runtime.GC() - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 3 { - t.Fatal(count - zero) - } - for i := 0; i < 256; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 259 { - t.Fatal(count - zero) - } - for i := 0; i < 257; i++ { - runtime.GC() - } - CaptureRuntimeMemStatsOnce(r) - if count := runtimeMetrics.MemStats.PauseNs.Count(); count-zero != 515 { // We lost one because there were too many GCs between captures. - t.Fatal(count - zero) - } -} - -func TestRuntimeMemStatsNumThread(t *testing.T) { - r := NewRegistry() - RegisterRuntimeMemStats(r) - CaptureRuntimeMemStatsOnce(r) - - if value := runtimeMetrics.NumThread.Value(); value < 1 { - t.Fatalf("got NumThread: %d, wanted at least 1", value) - } -} - -func TestRuntimeMemStatsBlocking(t *testing.T) { - if g := runtime.GOMAXPROCS(0); g < 2 { - t.Skipf("skipping TestRuntimeMemStatsBlocking with GOMAXPROCS=%d\n", g) - } - ch := make(chan int) - go testRuntimeMemStatsBlocking(ch) - var memStats runtime.MemStats - t0 := time.Now() - runtime.ReadMemStats(&memStats) - t1 := time.Now() - t.Log("i++ during runtime.ReadMemStats:", <-ch) - go testRuntimeMemStatsBlocking(ch) - d := t1.Sub(t0) - t.Log(d) - time.Sleep(d) - t.Log("i++ during time.Sleep:", <-ch) -} - -func testRuntimeMemStatsBlocking(ch chan int) { - i := 0 - for { - select { - case ch <- i: - return - default: - i++ - } - } -} diff --git a/metrics/runtimehistogram.go b/metrics/runtimehistogram.go new file mode 100644 index 000000000000..92fcbcc2814c --- /dev/null +++ b/metrics/runtimehistogram.go @@ -0,0 +1,301 @@ +package metrics + +import ( + "math" + "runtime/metrics" + "sort" + "sync/atomic" +) + +func getOrRegisterRuntimeHistogram(name string, scale float64, r Registry) *runtimeHistogram { + if r == nil { + r = DefaultRegistry + } + constructor := func() Histogram { return newRuntimeHistogram(scale) } + return r.GetOrRegister(name, constructor).(*runtimeHistogram) +} + +// runtimeHistogram wraps a runtime/metrics histogram. +type runtimeHistogram struct { + v atomic.Value // v is a pointer to a metrics.Float64Histogram + scaleFactor float64 +} + +func newRuntimeHistogram(scale float64) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} + h.update(new(metrics.Float64Histogram)) + return h +} + +func RuntimeHistogramFromData(scale float64, hist *metrics.Float64Histogram) *runtimeHistogram { + h := &runtimeHistogram{scaleFactor: scale} + h.update(hist) + return h +} + +func (h *runtimeHistogram) update(mh *metrics.Float64Histogram) { + if mh == nil { + // The update value can be nil if the current Go version doesn't support a + // requested metric. It's just easier to handle nil here than putting + // conditionals everywhere. + return + } + + s := metrics.Float64Histogram{ + Counts: make([]uint64, len(mh.Counts)), + Buckets: make([]float64, len(mh.Buckets)), + } + copy(s.Counts, mh.Counts) + for i, b := range mh.Buckets { + s.Buckets[i] = b * h.scaleFactor + } + h.v.Store(&s) +} + +func (h *runtimeHistogram) Clear() { + panic("runtimeHistogram does not support Clear") +} +func (h *runtimeHistogram) Update(int64) { + panic("runtimeHistogram does not support Update") +} + +// Snapshot returns a non-changing copy of the histogram. +func (h *runtimeHistogram) Snapshot() HistogramSnapshot { + hist := h.v.Load().(*metrics.Float64Histogram) + return newRuntimeHistogramSnapshot(hist) +} + +type runtimeHistogramSnapshot struct { + internal *metrics.Float64Histogram + calculated bool + // The following fields are (lazily) calculated based on 'internal' + mean float64 + count int64 + min int64 // min is the lowest sample value. + max int64 // max is the highest sample value. + variance float64 +} + +func newRuntimeHistogramSnapshot(h *metrics.Float64Histogram) *runtimeHistogramSnapshot { + return &runtimeHistogramSnapshot{ + internal: h, + } +} + +// calc calculates the values for the snapshot. This method is not threadsafe. +func (h *runtimeHistogramSnapshot) calc() { + h.calculated = true + var ( + count int64 // number of samples + sum float64 // approx sum of all sample values + min int64 + max float64 + ) + if len(h.internal.Counts) == 0 { + return + } + for i, c := range h.internal.Counts { + if c == 0 { + continue + } + if count == 0 { // Set min only first loop iteration + min = int64(math.Floor(h.internal.Buckets[i])) + } + count += int64(c) + sum += h.midpoint(i) * float64(c) + // Set max on every iteration + edge := h.internal.Buckets[i+1] + if math.IsInf(edge, 1) { + edge = h.internal.Buckets[i] + } + if edge > max { + max = edge + } + } + h.min = min + h.max = int64(max) + h.mean = sum / float64(count) + h.count = count +} + +// Count returns the sample count. +func (h *runtimeHistogramSnapshot) Count() int64 { + if !h.calculated { + h.calc() + } + return h.count +} + +// Size returns the size of the sample at the time the snapshot was taken. +func (h *runtimeHistogramSnapshot) Size() int { + return len(h.internal.Counts) +} + +// Mean returns an approximation of the mean. +func (h *runtimeHistogramSnapshot) Mean() float64 { + if !h.calculated { + h.calc() + } + return h.mean +} + +func (h *runtimeHistogramSnapshot) midpoint(bucket int) float64 { + high := h.internal.Buckets[bucket+1] + low := h.internal.Buckets[bucket] + if math.IsInf(high, 1) { + // The edge of the highest bucket can be +Inf, and it's supposed to mean that this + // bucket contains all remaining samples > low. We can't get the middle of an + // infinite range, so just return the lower bound of this bucket instead. + return low + } + if math.IsInf(low, -1) { + // Similarly, we can get -Inf in the left edge of the lowest bucket, + // and it means the bucket contains all remaining values < high. + return high + } + return (low + high) / 2 +} + +// StdDev approximates the standard deviation of the histogram. +func (h *runtimeHistogramSnapshot) StdDev() float64 { + return math.Sqrt(h.Variance()) +} + +// Variance approximates the variance of the histogram. +func (h *runtimeHistogramSnapshot) Variance() float64 { + if len(h.internal.Counts) == 0 { + return 0 + } + if !h.calculated { + h.calc() + } + if h.count <= 1 { + // There is no variance when there are zero or one items. + return 0 + } + // Variance is not calculated in 'calc', because it requires a second iteration. + // Therefore we calculate it lazily in this method, triggered either by + // a direct call to Variance or via StdDev. + if h.variance != 0.0 { + return h.variance + } + var sum float64 + + for i, c := range h.internal.Counts { + midpoint := h.midpoint(i) + d := midpoint - h.mean + sum += float64(c) * (d * d) + } + h.variance = sum / float64(h.count-1) + return h.variance +} + +// Percentile computes the p'th percentile value. +func (h *runtimeHistogramSnapshot) Percentile(p float64) float64 { + threshold := float64(h.Count()) * p + values := [1]float64{threshold} + h.computePercentiles(values[:]) + return values[0] +} + +// Percentiles computes all requested percentile values. +func (h *runtimeHistogramSnapshot) Percentiles(ps []float64) []float64 { + // Compute threshold values. We need these to be sorted + // for the percentile computation, but restore the original + // order later, so keep the indexes as well. + count := float64(h.Count()) + thresholds := make([]float64, len(ps)) + indexes := make([]int, len(ps)) + for i, percentile := range ps { + thresholds[i] = count * math.Max(0, math.Min(1.0, percentile)) + indexes[i] = i + } + sort.Sort(floatsAscendingKeepingIndex{thresholds, indexes}) + + // Now compute. The result is stored back into the thresholds slice. + h.computePercentiles(thresholds) + + // Put the result back into the requested order. + sort.Sort(floatsByIndex{thresholds, indexes}) + return thresholds +} + +func (h *runtimeHistogramSnapshot) computePercentiles(thresh []float64) { + var totalCount float64 + for i, count := range h.internal.Counts { + totalCount += float64(count) + + for len(thresh) > 0 && thresh[0] < totalCount { + thresh[0] = h.internal.Buckets[i] + thresh = thresh[1:] + } + if len(thresh) == 0 { + return + } + } +} + +// Note: runtime/metrics.Float64Histogram is a collection of float64s, but the methods +// below need to return int64 to satisfy the interface. The histogram provided by runtime +// also doesn't keep track of individual samples, so results are approximated. + +// Max returns the highest sample value. +func (h *runtimeHistogramSnapshot) Max() int64 { + if !h.calculated { + h.calc() + } + return h.max +} + +// Min returns the lowest sample value. +func (h *runtimeHistogramSnapshot) Min() int64 { + if !h.calculated { + h.calc() + } + return h.min +} + +// Sum returns the sum of all sample values. +func (h *runtimeHistogramSnapshot) Sum() int64 { + var sum float64 + for i := range h.internal.Counts { + sum += h.internal.Buckets[i] * float64(h.internal.Counts[i]) + } + return int64(math.Ceil(sum)) +} + +type floatsAscendingKeepingIndex struct { + values []float64 + indexes []int +} + +func (s floatsAscendingKeepingIndex) Len() int { + return len(s.values) +} + +func (s floatsAscendingKeepingIndex) Less(i, j int) bool { + return s.values[i] < s.values[j] +} + +func (s floatsAscendingKeepingIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} + +type floatsByIndex struct { + values []float64 + indexes []int +} + +func (s floatsByIndex) Len() int { + return len(s.values) +} + +func (s floatsByIndex) Less(i, j int) bool { + return s.indexes[i] < s.indexes[j] +} + +func (s floatsByIndex) Swap(i, j int) { + s.values[i], s.values[j] = s.values[j], s.values[i] + s.indexes[i], s.indexes[j] = s.indexes[j], s.indexes[i] +} diff --git a/metrics/runtimehistogram_test.go b/metrics/runtimehistogram_test.go new file mode 100644 index 000000000000..cf7e36420ae9 --- /dev/null +++ b/metrics/runtimehistogram_test.go @@ -0,0 +1,162 @@ +package metrics + +import ( + "bytes" + "encoding/gob" + "fmt" + "math" + "reflect" + "runtime/metrics" + "testing" + "time" +) + +var _ Histogram = (*runtimeHistogram)(nil) + +type runtimeHistogramTest struct { + h metrics.Float64Histogram + + Count int64 + Min int64 + Max int64 + Sum int64 + Mean float64 + Variance float64 + StdDev float64 + Percentiles []float64 // .5 .8 .9 .99 .995 +} + +// This test checks the results of statistical functions implemented +// by runtimeHistogramSnapshot. +func TestRuntimeHistogramStats(t *testing.T) { + tests := []runtimeHistogramTest{ + 0: { + h: metrics.Float64Histogram{ + Counts: []uint64{}, + Buckets: []float64{}, + }, + Count: 0, + Max: 0, + Min: 0, + Sum: 0, + Mean: 0, + Variance: 0, + StdDev: 0, + Percentiles: []float64{0, 0, 0, 0, 0}, + }, + 1: { + // This checks the case where the highest bucket is +Inf. + h: metrics.Float64Histogram{ + Counts: []uint64{0, 1, 2}, + Buckets: []float64{0, 0.5, 1, math.Inf(1)}, + }, + Count: 3, + Max: 1, + Min: 0, + Sum: 3, + Mean: 0.9166666, + Percentiles: []float64{1, 1, 1, 1, 1}, + Variance: 0.020833, + StdDev: 0.144433, + }, + 2: { + h: metrics.Float64Histogram{ + Counts: []uint64{8, 6, 3, 1}, + Buckets: []float64{12, 16, 18, 24, 25}, + }, + Count: 18, + Max: 25, + Min: 12, + Sum: 270, + Mean: 16.75, + Variance: 10.3015, + StdDev: 3.2096, + Percentiles: []float64{16, 18, 18, 24, 24}, + }, + } + + for i, test := range tests { + t.Run(fmt.Sprint(i), func(t *testing.T) { + s := RuntimeHistogramFromData(1.0, &test.h).Snapshot() + + if v := s.Count(); v != test.Count { + t.Errorf("Count() = %v, want %v", v, test.Count) + } + if v := s.Min(); v != test.Min { + t.Errorf("Min() = %v, want %v", v, test.Min) + } + if v := s.Max(); v != test.Max { + t.Errorf("Max() = %v, want %v", v, test.Max) + } + if v := s.Sum(); v != test.Sum { + t.Errorf("Sum() = %v, want %v", v, test.Sum) + } + if v := s.Mean(); !approxEqual(v, test.Mean, 0.0001) { + t.Errorf("Mean() = %v, want %v", v, test.Mean) + } + if v := s.Variance(); !approxEqual(v, test.Variance, 0.0001) { + t.Errorf("Variance() = %v, want %v", v, test.Variance) + } + if v := s.StdDev(); !approxEqual(v, test.StdDev, 0.0001) { + t.Errorf("StdDev() = %v, want %v", v, test.StdDev) + } + ps := []float64{.5, .8, .9, .99, .995} + if v := s.Percentiles(ps); !reflect.DeepEqual(v, test.Percentiles) { + t.Errorf("Percentiles(%v) = %v, want %v", ps, v, test.Percentiles) + } + }) + } +} + +func approxEqual(x, y, ε float64) bool { + if math.IsInf(x, -1) && math.IsInf(y, -1) { + return true + } + if math.IsInf(x, 1) && math.IsInf(y, 1) { + return true + } + if math.IsNaN(x) && math.IsNaN(y) { + return true + } + return math.Abs(x-y) < ε +} + +// This test verifies that requesting Percentiles in unsorted order +// returns them in the requested order. +func TestRuntimeHistogramStatsPercentileOrder(t *testing.T) { + s := RuntimeHistogramFromData(1.0, &metrics.Float64Histogram{ + Counts: []uint64{1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + Buckets: []float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + }).Snapshot() + result := s.Percentiles([]float64{1, 0.2, 0.5, 0.1, 0.2}) + expected := []float64{10, 2, 5, 1, 2} + if !reflect.DeepEqual(result, expected) { + t.Fatal("wrong result:", result) + } +} + +func BenchmarkRuntimeHistogramSnapshotRead(b *testing.B) { + var sLatency = "7\xff\x81\x03\x01\x01\x10Float64Histogram\x01\xff\x82\x00\x01\x02\x01\x06Counts\x01\xff\x84\x00\x01\aBuckets\x01\xff\x86\x00\x00\x00\x16\xff\x83\x02\x01\x01\b[]uint64\x01\xff\x84\x00\x01\x06\x00\x00\x17\xff\x85\x02\x01\x01\t[]float64\x01\xff\x86\x00\x01\b\x00\x00\xfe\x06T\xff\x82\x01\xff\xa2\x00\xfe\r\xef\x00\x01\x02\x02\x04\x05\x04\b\x15\x17 B?6.L;$!2) \x1a? \x190aH7FY6#\x190\x1d\x14\x10\x1b\r\t\x04\x03\x01\x01\x00\x03\x02\x00\x03\x05\x05\x02\x02\x06\x04\v\x06\n\x15\x18\x13'&.\x12=H/L&\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\xff\xa3\xfe\xf0\xff\x00\xf8\x95\xd6&\xe8\v.q>\xf8\x95\xd6&\xe8\v.\x81>\xf8\xdfA:\xdc\x11ʼn>\xf8\x95\xd6&\xe8\v.\x91>\xf8:\x8c0\xe2\x8ey\x95>\xf8\xdfA:\xdc\x11ř>\xf8\x84\xf7C֔\x10\x9e>\xf8\x95\xd6&\xe8\v.\xa1>\xf8:\x8c0\xe2\x8ey\xa5>\xf8\xdfA:\xdc\x11ũ>\xf8\x84\xf7C֔\x10\xae>\xf8\x95\xd6&\xe8\v.\xb1>\xf8:\x8c0\xe2\x8ey\xb5>\xf8\xdfA:\xdc\x11Ź>\xf8\x84\xf7C֔\x10\xbe>\xf8\x95\xd6&\xe8\v.\xc1>\xf8:\x8c0\xe2\x8ey\xc5>\xf8\xdfA:\xdc\x11\xc5\xc9>\xf8\x84\xf7C֔\x10\xce>\xf8\x95\xd6&\xe8\v.\xd1>\xf8:\x8c0\xe2\x8ey\xd5>\xf8\xdfA:\xdc\x11\xc5\xd9>\xf8\x84\xf7C֔\x10\xde>\xf8\x95\xd6&\xe8\v.\xe1>\xf8:\x8c0\xe2\x8ey\xe5>\xf8\xdfA:\xdc\x11\xc5\xe9>\xf8\x84\xf7C֔\x10\xee>\xf8\x95\xd6&\xe8\v.\xf1>\xf8:\x8c0\xe2\x8ey\xf5>\xf8\xdfA:\xdc\x11\xc5\xf9>\xf8\x84\xf7C֔\x10\xfe>\xf8\x95\xd6&\xe8\v.\x01?\xf8:\x8c0\xe2\x8ey\x05?\xf8\xdfA:\xdc\x11\xc5\t?\xf8\x84\xf7C֔\x10\x0e?\xf8\x95\xd6&\xe8\v.\x11?\xf8:\x8c0\xe2\x8ey\x15?\xf8\xdfA:\xdc\x11\xc5\x19?\xf8\x84\xf7C֔\x10\x1e?\xf8\x95\xd6&\xe8\v.!?\xf8:\x8c0\xe2\x8ey%?\xf8\xdfA:\xdc\x11\xc5)?\xf8\x84\xf7C֔\x10.?\xf8\x95\xd6&\xe8\v.1?\xf8:\x8c0\xe2\x8ey5?\xf8\xdfA:\xdc\x11\xc59?\xf8\x84\xf7C֔\x10>?\xf8\x95\xd6&\xe8\v.A?\xf8:\x8c0\xe2\x8eyE?\xf8\xdfA:\xdc\x11\xc5I?\xf8\x84\xf7C֔\x10N?\xf8\x95\xd6&\xe8\v.Q?\xf8:\x8c0\xe2\x8eyU?\xf8\xdfA:\xdc\x11\xc5Y?\xf8\x84\xf7C֔\x10^?\xf8\x95\xd6&\xe8\v.a?\xf8:\x8c0\xe2\x8eye?\xf8\xdfA:\xdc\x11\xc5i?\xf8\x84\xf7C֔\x10n?\xf8\x95\xd6&\xe8\v.q?\xf8:\x8c0\xe2\x8eyu?\xf8\xdfA:\xdc\x11\xc5y?\xf8\x84\xf7C֔\x10~?\xf8\x95\xd6&\xe8\v.\x81?\xf8:\x8c0\xe2\x8ey\x85?\xf8\xdfA:\xdc\x11ʼn?\xf8\x84\xf7C֔\x10\x8e?\xf8\x95\xd6&\xe8\v.\x91?\xf8:\x8c0\xe2\x8ey\x95?\xf8\xdfA:\xdc\x11ř?\xf8\x84\xf7C֔\x10\x9e?\xf8\x95\xd6&\xe8\v.\xa1?\xf8:\x8c0\xe2\x8ey\xa5?\xf8\xdfA:\xdc\x11ũ?\xf8\x84\xf7C֔\x10\xae?\xf8\x95\xd6&\xe8\v.\xb1?\xf8:\x8c0\xe2\x8ey\xb5?\xf8\xdfA:\xdc\x11Ź?\xf8\x84\xf7C֔\x10\xbe?\xf8\x95\xd6&\xe8\v.\xc1?\xf8:\x8c0\xe2\x8ey\xc5?\xf8\xdfA:\xdc\x11\xc5\xc9?\xf8\x84\xf7C֔\x10\xce?\xf8\x95\xd6&\xe8\v.\xd1?\xf8:\x8c0\xe2\x8ey\xd5?\xf8\xdfA:\xdc\x11\xc5\xd9?\xf8\x84\xf7C֔\x10\xde?\xf8\x95\xd6&\xe8\v.\xe1?\xf8:\x8c0\xe2\x8ey\xe5?\xf8\xdfA:\xdc\x11\xc5\xe9?\xf8\x84\xf7C֔\x10\xee?\xf8\x95\xd6&\xe8\v.\xf1?\xf8:\x8c0\xe2\x8ey\xf5?\xf8\xdfA:\xdc\x11\xc5\xf9?\xf8\x84\xf7C֔\x10\xfe?\xf8\x95\xd6&\xe8\v.\x01@\xf8:\x8c0\xe2\x8ey\x05@\xf8\xdfA:\xdc\x11\xc5\t@\xf8\x84\xf7C֔\x10\x0e@\xf8\x95\xd6&\xe8\v.\x11@\xf8:\x8c0\xe2\x8ey\x15@\xf8\xdfA:\xdc\x11\xc5\x19@\xf8\x84\xf7C֔\x10\x1e@\xf8\x95\xd6&\xe8\v.!@\xf8:\x8c0\xe2\x8ey%@\xf8\xdfA:\xdc\x11\xc5)@\xf8\x84\xf7C֔\x10.@\xf8\x95\xd6&\xe8\v.1@\xf8:\x8c0\xe2\x8ey5@\xf8\xdfA:\xdc\x11\xc59@\xf8\x84\xf7C֔\x10>@\xf8\x95\xd6&\xe8\v.A@\xf8:\x8c0\xe2\x8eyE@\xf8\xdfA:\xdc\x11\xc5I@\xf8\x84\xf7C֔\x10N@\xf8\x95\xd6&\xe8\v.Q@\xf8:\x8c0\xe2\x8eyU@\xf8\xdfA:\xdc\x11\xc5Y@\xf8\x84\xf7C֔\x10^@\xf8\x95\xd6&\xe8\v.a@\xf8:\x8c0\xe2\x8eye@\xf8\xdfA:\xdc\x11\xc5i@\xf8\x84\xf7C֔\x10n@\xf8\x95\xd6&\xe8\v.q@\xf8:\x8c0\xe2\x8eyu@\xf8\xdfA:\xdc\x11\xc5y@\xf8\x84\xf7C֔\x10~@\xf8\x95\xd6&\xe8\v.\x81@\xf8:\x8c0\xe2\x8ey\x85@\xf8\xdfA:\xdc\x11ʼn@\xf8\x84\xf7C֔\x10\x8e@\xf8\x95\xd6&\xe8\v.\x91@\xf8:\x8c0\xe2\x8ey\x95@\xf8\xdfA:\xdc\x11ř@\xf8\x84\xf7C֔\x10\x9e@\xf8\x95\xd6&\xe8\v.\xa1@\xf8:\x8c0\xe2\x8ey\xa5@\xf8\xdfA:\xdc\x11ũ@\xf8\x84\xf7C֔\x10\xae@\xf8\x95\xd6&\xe8\v.\xb1@\xf8:\x8c0\xe2\x8ey\xb5@\xf8\xdfA:\xdc\x11Ź@\xf8\x84\xf7C֔\x10\xbe@\xf8\x95\xd6&\xe8\v.\xc1@\xf8:\x8c0\xe2\x8ey\xc5@\xf8\xdfA:\xdc\x11\xc5\xc9@\xf8\x84\xf7C֔\x10\xce@\xf8\x95\xd6&\xe8\v.\xd1@\xf8:\x8c0\xe2\x8ey\xd5@\xf8\xdfA:\xdc\x11\xc5\xd9@\xf8\x84\xf7C֔\x10\xde@\xf8\x95\xd6&\xe8\v.\xe1@\xf8:\x8c0\xe2\x8ey\xe5@\xf8\xdfA:\xdc\x11\xc5\xe9@\xf8\x84\xf7C֔\x10\xee@\xf8\x95\xd6&\xe8\v.\xf1@\xf8:\x8c0\xe2\x8ey\xf5@\xf8\xdfA:\xdc\x11\xc5\xf9@\xf8\x84\xf7C֔\x10\xfe@\xf8\x95\xd6&\xe8\v.\x01A\xfe\xf0\x7f\x00" + + dserialize := func(data string) *metrics.Float64Histogram { + var res metrics.Float64Histogram + if err := gob.NewDecoder(bytes.NewReader([]byte(data))).Decode(&res); err != nil { + panic(err) + } + return &res + } + latency := RuntimeHistogramFromData(float64(time.Second), dserialize(sLatency)) + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + snap := latency.Snapshot() + // These are the fields that influxdb accesses + _ = snap.Count() + _ = snap.Max() + _ = snap.Mean() + _ = snap.Min() + _ = snap.StdDev() + _ = snap.Variance() + _ = snap.Percentiles([]float64{0.25, 0.5, 0.75, 0.95, 0.99, 0.999, 0.9999}) + } +} diff --git a/metrics/sample.go b/metrics/sample.go index fa2bfb274e39..dc8167809fdd 100644 --- a/metrics/sample.go +++ b/metrics/sample.go @@ -3,30 +3,128 @@ package metrics import ( "math" "math/rand" - "sort" + "slices" "sync" "time" ) const rescaleThreshold = time.Hour -// Samples maintain a statistically-significant selection of values from +// Sample maintains a statistically-significant selection of values from // a stream. type Sample interface { + Snapshot() *sampleSnapshot Clear() - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Size() int - Snapshot() Sample - StdDev() float64 - Sum() int64 Update(int64) - Values() []int64 - Variance() float64 +} + +var ( + _ Sample = (*ExpDecaySample)(nil) + _ Sample = (*UniformSample)(nil) + _ Sample = (*resettingSample)(nil) +) + +// sampleSnapshot is a read-only copy of a Sample. +type sampleSnapshot struct { + count int64 + values []int64 + + max int64 + min int64 + mean float64 + sum int64 + variance float64 +} + +// newSampleSnapshotPrecalculated creates a read-only sampleSnapShot, using +// precalculated sums to avoid iterating the values +func newSampleSnapshotPrecalculated(count int64, values []int64, min, max, sum int64) *sampleSnapshot { + if len(values) == 0 { + return &sampleSnapshot{ + count: count, + values: values, + } + } + return &sampleSnapshot{ + count: count, + values: values, + max: max, + min: min, + mean: float64(sum) / float64(len(values)), + sum: sum, + } +} + +// newSampleSnapshot creates a read-only sampleSnapShot, and calculates some +// numbers. +func newSampleSnapshot(count int64, values []int64) *sampleSnapshot { + var ( + max int64 = math.MinInt64 + min int64 = math.MaxInt64 + sum int64 + ) + for _, v := range values { + sum += v + if v > max { + max = v + } + if v < min { + min = v + } + } + return newSampleSnapshotPrecalculated(count, values, min, max, sum) +} + +// Count returns the count of inputs at the time the snapshot was taken. +func (s *sampleSnapshot) Count() int64 { return s.count } + +// Max returns the maximal value at the time the snapshot was taken. +func (s *sampleSnapshot) Max() int64 { return s.max } + +// Mean returns the mean value at the time the snapshot was taken. +func (s *sampleSnapshot) Mean() float64 { return s.mean } + +// Min returns the minimal value at the time the snapshot was taken. +func (s *sampleSnapshot) Min() int64 { return s.min } + +// Percentile returns an arbitrary percentile of values at the time the +// snapshot was taken. +func (s *sampleSnapshot) Percentile(p float64) float64 { + return SamplePercentile(s.values, p) +} + +// Percentiles returns a slice of arbitrary percentiles of values at the time +// the snapshot was taken. +func (s *sampleSnapshot) Percentiles(ps []float64) []float64 { + return CalculatePercentiles(s.values, ps) +} + +// Size returns the size of the sample at the time the snapshot was taken. +func (s *sampleSnapshot) Size() int { return len(s.values) } + +// StdDev returns the standard deviation of values at the time the snapshot was +// taken. +func (s *sampleSnapshot) StdDev() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) + } + return math.Sqrt(s.variance) +} + +// Sum returns the sum of values at the time the snapshot was taken. +func (s *sampleSnapshot) Sum() int64 { return s.sum } + +// Values returns a copy of the values in the sample. +func (s *sampleSnapshot) Values() []int64 { + return slices.Clone(s.values) +} + +// Variance returns the variance of values at the time the snapshot was taken. +func (s *sampleSnapshot) Variance() float64 { + if s.variance == 0.0 { + s.variance = SampleVariance(s.mean, s.values) + } + return s.variance } // ExpDecaySample is an exponentially-decaying sample using a forward-decaying @@ -41,14 +139,12 @@ type ExpDecaySample struct { reservoirSize int t0, t1 time.Time values *expDecaySampleHeap + rand *rand.Rand } // NewExpDecaySample constructs a new exponentially-decaying sample with the // given reservoir size and alpha. func NewExpDecaySample(reservoirSize int, alpha float64) Sample { - if !Enabled { - return NilSample{} - } s := &ExpDecaySample{ alpha: alpha, reservoirSize: reservoirSize, @@ -59,6 +155,12 @@ func NewExpDecaySample(reservoirSize int, alpha float64) Sample { return s } +// SetRand sets the random source (useful in tests) +func (s *ExpDecaySample) SetRand(prng *rand.Rand) Sample { + s.rand = prng + return s +} + // Clear clears all samples. func (s *ExpDecaySample) Clear() { s.mutex.Lock() @@ -69,94 +171,37 @@ func (s *ExpDecaySample) Clear() { s.values.Clear() } -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *ExpDecaySample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *ExpDecaySample) Max() int64 { - return SampleMax(s.Values()) -} - -// Mean returns the mean of the values in the sample. -func (s *ExpDecaySample) Mean() float64 { - return SampleMean(s.Values()) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *ExpDecaySample) Min() int64 { - return SampleMin(s.Values()) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *ExpDecaySample) Percentile(p float64) float64 { - return SamplePercentile(s.Values(), p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *ExpDecaySample) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.Values(), ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *ExpDecaySample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.values.Size() -} - // Snapshot returns a read-only copy of the sample. -func (s *ExpDecaySample) Snapshot() Sample { +func (s *ExpDecaySample) Snapshot() *sampleSnapshot { s.mutex.Lock() defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v - } - return &SampleSnapshot{ - count: s.count, - values: values, + var ( + samples = s.values.Values() + values = make([]int64, len(samples)) + max int64 = math.MinInt64 + min int64 = math.MaxInt64 + sum int64 + ) + for i, item := range samples { + v := item.v + values[i] = v + sum += v + if v > max { + max = v + } + if v < min { + min = v + } } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *ExpDecaySample) StdDev() float64 { - return SampleStdDev(s.Values()) -} - -// Sum returns the sum of the values in the sample. -func (s *ExpDecaySample) Sum() int64 { - return SampleSum(s.Values()) + return newSampleSnapshotPrecalculated(s.count, values, min, max, sum) } // Update samples a new value. func (s *ExpDecaySample) Update(v int64) { - s.update(time.Now(), v) -} - -// Values returns a copy of the values in the sample. -func (s *ExpDecaySample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - vals := s.values.Values() - values := make([]int64, len(vals)) - for i, v := range vals { - values[i] = v.v + if !metricsEnabled { + return } - return values -} - -// Variance returns the variance of the values in the sample. -func (s *ExpDecaySample) Variance() float64 { - return SampleVariance(s.Values()) + s.update(time.Now(), v) } // update samples a new value at a particular timestamp. This is a method all @@ -168,8 +213,14 @@ func (s *ExpDecaySample) update(t time.Time, v int64) { if s.values.Size() == s.reservoirSize { s.values.Pop() } + var f64 float64 + if s.rand != nil { + f64 = s.rand.Float64() + } else { + f64 = rand.Float64() + } s.values.Push(expDecaySample{ - k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / rand.Float64(), + k: math.Exp(t.Sub(s.t0).Seconds()*s.alpha) / f64, v: v, }) if t.After(s.t1) { @@ -185,216 +236,54 @@ func (s *ExpDecaySample) update(t time.Time, v int64) { } } -// NilSample is a no-op Sample. -type NilSample struct{} - -// Clear is a no-op. -func (NilSample) Clear() {} - -// Count is a no-op. -func (NilSample) Count() int64 { return 0 } - -// Max is a no-op. -func (NilSample) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilSample) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilSample) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilSample) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilSample) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) +// SamplePercentile returns an arbitrary percentile of the slice of int64. +func SamplePercentile(values []int64, p float64) float64 { + return CalculatePercentiles(values, []float64{p})[0] } -// Size is a no-op. -func (NilSample) Size() int { return 0 } - -// Sample is a no-op. -func (NilSample) Snapshot() Sample { return NilSample{} } - -// StdDev is a no-op. -func (NilSample) StdDev() float64 { return 0.0 } - -// Sum is a no-op. -func (NilSample) Sum() int64 { return 0 } - -// Update is a no-op. -func (NilSample) Update(v int64) {} - -// Values is a no-op. -func (NilSample) Values() []int64 { return []int64{} } - -// Variance is a no-op. -func (NilSample) Variance() float64 { return 0.0 } - -// SampleMax returns the maximum value of the slice of int64. -func SampleMax(values []int64) int64 { - if len(values) == 0 { - return 0 - } - var max int64 = math.MinInt64 - for _, v := range values { - if max < v { - max = v - } - } - return max -} - -// SampleMean returns the mean value of the slice of int64. -func SampleMean(values []int64) float64 { - if len(values) == 0 { - return 0.0 - } - return float64(SampleSum(values)) / float64(len(values)) -} - -// SampleMin returns the minimum value of the slice of int64. -func SampleMin(values []int64) int64 { - if len(values) == 0 { - return 0 - } - var min int64 = math.MaxInt64 - for _, v := range values { - if min > v { - min = v - } - } - return min -} - -// SamplePercentiles returns an arbitrary percentile of the slice of int64. -func SamplePercentile(values int64Slice, p float64) float64 { - return SamplePercentiles(values, []float64{p})[0] -} - -// SamplePercentiles returns a slice of arbitrary percentiles of the slice of -// int64. -func SamplePercentiles(values int64Slice, ps []float64) []float64 { +// CalculatePercentiles returns a slice of arbitrary percentiles of the slice of +// int64. This method returns interpolated results, so e.g. if there are only two +// values, [0, 10], a 50% percentile will land between them. +// +// Note: As a side-effect, this method will also sort the slice of values. +// Note2: The input format for percentiles is NOT percent! To express 50%, use 0.5, not 50. +func CalculatePercentiles(values []int64, ps []float64) []float64 { scores := make([]float64, len(ps)) size := len(values) - if size > 0 { - sort.Sort(values) - for i, p := range ps { - pos := p * float64(size+1) - if pos < 1.0 { - scores[i] = float64(values[0]) - } else if pos >= float64(size) { - scores[i] = float64(values[size-1]) - } else { - lower := float64(values[int(pos)-1]) - upper := float64(values[int(pos)]) - scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) - } + if size == 0 { + return scores + } + slices.Sort(values) + for i, p := range ps { + pos := p * float64(size+1) + + if pos < 1.0 { + scores[i] = float64(values[0]) + } else if pos >= float64(size) { + scores[i] = float64(values[size-1]) + } else { + lower := float64(values[int(pos)-1]) + upper := float64(values[int(pos)]) + scores[i] = lower + (pos-math.Floor(pos))*(upper-lower) } } return scores } -// SampleSnapshot is a read-only copy of another Sample. -type SampleSnapshot struct { - count int64 - values []int64 -} - -func NewSampleSnapshot(count int64, values []int64) *SampleSnapshot { - return &SampleSnapshot{ - count: count, - values: values, - } -} - -// Clear panics. -func (*SampleSnapshot) Clear() { - panic("Clear called on a SampleSnapshot") -} - -// Count returns the count of inputs at the time the snapshot was taken. -func (s *SampleSnapshot) Count() int64 { return s.count } - -// Max returns the maximal value at the time the snapshot was taken. -func (s *SampleSnapshot) Max() int64 { return SampleMax(s.values) } - -// Mean returns the mean value at the time the snapshot was taken. -func (s *SampleSnapshot) Mean() float64 { return SampleMean(s.values) } - -// Min returns the minimal value at the time the snapshot was taken. -func (s *SampleSnapshot) Min() int64 { return SampleMin(s.values) } - -// Percentile returns an arbitrary percentile of values at the time the -// snapshot was taken. -func (s *SampleSnapshot) Percentile(p float64) float64 { - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values at the time -// the snapshot was taken. -func (s *SampleSnapshot) Percentiles(ps []float64) []float64 { - return SamplePercentiles(s.values, ps) -} - -// Size returns the size of the sample at the time the snapshot was taken. -func (s *SampleSnapshot) Size() int { return len(s.values) } - -// Snapshot returns the snapshot. -func (s *SampleSnapshot) Snapshot() Sample { return s } - -// StdDev returns the standard deviation of values at the time the snapshot was -// taken. -func (s *SampleSnapshot) StdDev() float64 { return SampleStdDev(s.values) } - -// Sum returns the sum of values at the time the snapshot was taken. -func (s *SampleSnapshot) Sum() int64 { return SampleSum(s.values) } - -// Update panics. -func (*SampleSnapshot) Update(int64) { - panic("Update called on a SampleSnapshot") -} - -// Values returns a copy of the values in the sample. -func (s *SampleSnapshot) Values() []int64 { - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of values at the time the snapshot was taken. -func (s *SampleSnapshot) Variance() float64 { return SampleVariance(s.values) } - -// SampleStdDev returns the standard deviation of the slice of int64. -func SampleStdDev(values []int64) float64 { - return math.Sqrt(SampleVariance(values)) -} - -// SampleSum returns the sum of the slice of int64. -func SampleSum(values []int64) int64 { - var sum int64 - for _, v := range values { - sum += v - } - return sum -} - // SampleVariance returns the variance of the slice of int64. -func SampleVariance(values []int64) float64 { +func SampleVariance(mean float64, values []int64) float64 { if len(values) == 0 { return 0.0 } - m := SampleMean(values) var sum float64 for _, v := range values { - d := float64(v) - m + d := float64(v) - mean sum += d * d } return sum / float64(len(values)) } -// A uniform sample using Vitter's Algorithm R. +// UniformSample implements a uniform sample using Vitter's Algorithm R. // // type UniformSample struct { @@ -402,136 +291,62 @@ type UniformSample struct { mutex sync.Mutex reservoirSize int values []int64 + rand *rand.Rand } // NewUniformSample constructs a new uniform sample with the given reservoir // size. func NewUniformSample(reservoirSize int) Sample { - if !Enabled { - return NilSample{} - } return &UniformSample{ reservoirSize: reservoirSize, values: make([]int64, 0, reservoirSize), } } +// SetRand sets the random source (useful in tests) +func (s *UniformSample) SetRand(prng *rand.Rand) Sample { + s.rand = prng + return s +} + // Clear clears all samples. func (s *UniformSample) Clear() { s.mutex.Lock() defer s.mutex.Unlock() s.count = 0 - s.values = make([]int64, 0, s.reservoirSize) -} - -// Count returns the number of samples recorded, which may exceed the -// reservoir size. -func (s *UniformSample) Count() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return s.count -} - -// Max returns the maximum value in the sample, which may not be the maximum -// value ever to be part of the sample. -func (s *UniformSample) Max() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMax(s.values) -} - -// Mean returns the mean of the values in the sample. -func (s *UniformSample) Mean() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMean(s.values) -} - -// Min returns the minimum value in the sample, which may not be the minimum -// value ever to be part of the sample. -func (s *UniformSample) Min() int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleMin(s.values) -} - -// Percentile returns an arbitrary percentile of values in the sample. -func (s *UniformSample) Percentile(p float64) float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentile(s.values, p) -} - -// Percentiles returns a slice of arbitrary percentiles of values in the -// sample. -func (s *UniformSample) Percentiles(ps []float64) []float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SamplePercentiles(s.values, ps) -} - -// Size returns the size of the sample, which is at most the reservoir size. -func (s *UniformSample) Size() int { - s.mutex.Lock() - defer s.mutex.Unlock() - return len(s.values) + clear(s.values) } // Snapshot returns a read-only copy of the sample. -func (s *UniformSample) Snapshot() Sample { - s.mutex.Lock() - defer s.mutex.Unlock() - values := make([]int64, len(s.values)) - copy(values, s.values) - return &SampleSnapshot{ - count: s.count, - values: values, - } -} - -// StdDev returns the standard deviation of the values in the sample. -func (s *UniformSample) StdDev() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleStdDev(s.values) -} - -// Sum returns the sum of the values in the sample. -func (s *UniformSample) Sum() int64 { +func (s *UniformSample) Snapshot() *sampleSnapshot { s.mutex.Lock() - defer s.mutex.Unlock() - return SampleSum(s.values) + values := slices.Clone(s.values) + count := s.count + s.mutex.Unlock() + return newSampleSnapshot(count, values) } // Update samples a new value. func (s *UniformSample) Update(v int64) { + if !metricsEnabled { + return + } s.mutex.Lock() defer s.mutex.Unlock() s.count++ if len(s.values) < s.reservoirSize { s.values = append(s.values, v) + return + } + var r int64 + if s.rand != nil { + r = s.rand.Int63n(s.count) } else { - r := rand.Int63n(s.count) - if r < int64(len(s.values)) { - s.values[int(r)] = v - } + r = rand.Int63n(s.count) + } + if r < int64(len(s.values)) { + s.values[int(r)] = v } -} - -// Values returns a copy of the values in the sample. -func (s *UniformSample) Values() []int64 { - s.mutex.Lock() - defer s.mutex.Unlock() - values := make([]int64, len(s.values)) - copy(values, s.values) - return values -} - -// Variance returns the variance of the values in the sample. -func (s *UniformSample) Variance() float64 { - s.mutex.Lock() - defer s.mutex.Unlock() - return SampleVariance(s.values) } // expDecaySample represents an individual sample in a heap. @@ -608,9 +423,3 @@ func (h *expDecaySampleHeap) down(i, n int) { i = j } } - -type int64Slice []int64 - -func (p int64Slice) Len() int { return len(p) } -func (p int64Slice) Less(i, j int) bool { return p[i] < p[j] } -func (p int64Slice) Swap(i, j int) { p[i], p[j] = p[j], p[i] } diff --git a/metrics/sample_test.go b/metrics/sample_test.go index d6e966400b24..6619eb1e9eb2 100644 --- a/metrics/sample_test.go +++ b/metrics/sample_test.go @@ -1,56 +1,43 @@ package metrics import ( + "math" "math/rand" - "runtime" "testing" "time" ) +const epsilonPercentile = .00000000001 + // Benchmark{Compute,Copy}{1000,1000000} demonstrate that, even for relatively // expensive computations like Variance, the cost of copying the Sample, as // approximated by a make and copy, is much greater than the cost of the // computation for small samples and only slightly less for large samples. func BenchmarkCompute1000(b *testing.B) { s := make([]int64, 1000) + var sum int64 for i := 0; i < len(s); i++ { s[i] = int64(i) + sum += int64(i) } + mean := float64(sum) / float64(len(s)) b.ResetTimer() for i := 0; i < b.N; i++ { - SampleVariance(s) + SampleVariance(mean, s) } } + func BenchmarkCompute1000000(b *testing.B) { s := make([]int64, 1000000) + var sum int64 for i := 0; i < len(s); i++ { s[i] = int64(i) + sum += int64(i) } + mean := float64(sum) / float64(len(s)) b.ResetTimer() for i := 0; i < b.N; i++ { - SampleVariance(s) - } -} -func BenchmarkCopy1000(b *testing.B) { - s := make([]int64, 1000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) - } -} -func BenchmarkCopy1000000(b *testing.B) { - s := make([]int64, 1000000) - for i := 0; i < len(s); i++ { - s[i] = int64(i) - } - b.ResetTimer() - for i := 0; i < b.N; i++ { - sCopy := make([]int64, len(s)) - copy(sCopy, s) + SampleVariance(mean, s) } } @@ -78,68 +65,35 @@ func BenchmarkUniformSample1028(b *testing.B) { benchmarkSample(b, NewUniformSample(1028)) } -func TestExpDecaySample10(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 10; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 10 { - t.Errorf("s.Count(): 10 != %v\n", size) - } - if size := s.Size(); size != 10 { - t.Errorf("s.Size(): 10 != %v\n", size) - } - if l := len(s.Values()); l != 10 { - t.Errorf("len(s.Values()): 10 != %v\n", l) - } - for _, v := range s.Values() { - if v > 10 || v < 0 { - t.Errorf("out of range [0, 10): %v\n", v) +func TestExpDecaySample(t *testing.T) { + for _, tc := range []struct { + reservoirSize int + alpha float64 + updates int + }{ + {100, 0.99, 10}, + {1000, 0.01, 100}, + {100, 0.99, 1000}, + } { + sample := NewExpDecaySample(tc.reservoirSize, tc.alpha) + for i := 0; i < tc.updates; i++ { + sample.Update(int64(i)) } - } -} - -func TestExpDecaySample100(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(1000, 0.01) - for i := 0; i < 100; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 100 { - t.Errorf("s.Count(): 100 != %v\n", size) - } - if size := s.Size(); size != 100 { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); l != 100 { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 100 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) + snap := sample.Snapshot() + if have, want := int(snap.Count()), tc.updates; have != want { + t.Errorf("unexpected count: have %d want %d", have, want) } - } -} - -func TestExpDecaySample1000(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 1000; i++ { - s.Update(int64(i)) - } - if size := s.Count(); size != 1000 { - t.Errorf("s.Count(): 1000 != %v\n", size) - } - if size := s.Size(); size != 100 { - t.Errorf("s.Size(): 100 != %v\n", size) - } - if l := len(s.Values()); l != 100 { - t.Errorf("len(s.Values()): 100 != %v\n", l) - } - for _, v := range s.Values() { - if v > 1000 || v < 0 { - t.Errorf("out of range [0, 1000): %v\n", v) + if have, want := snap.Size(), min(tc.updates, tc.reservoirSize); have != want { + t.Errorf("unexpected size: have %d want %d", have, want) + } + values := snap.values + if have, want := len(values), min(tc.updates, tc.reservoirSize); have != want { + t.Errorf("unexpected values length: have %d want %d", have, want) + } + for _, v := range values { + if v > int64(tc.updates) || v < 0 { + t.Errorf("out of range [0, %d]: %v", tc.updates, v) + } } } } @@ -149,16 +103,15 @@ func TestExpDecaySample1000(t *testing.T) { // The priority becomes +Inf quickly after starting if this is done, // effectively freezing the set of samples until a rescale step happens. func TestExpDecaySampleNanosecondRegression(t *testing.T) { - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) - for i := 0; i < 100; i++ { - s.Update(10) + sw := NewExpDecaySample(1000, 0.99) + for i := 0; i < 1000; i++ { + sw.Update(10) } time.Sleep(1 * time.Millisecond) - for i := 0; i < 100; i++ { - s.Update(20) + for i := 0; i < 1000; i++ { + sw.Update(20) } - v := s.Values() + v := sw.Snapshot().values avg := float64(0) for i := 0; i < len(v); i++ { avg += float64(v[i]) @@ -182,8 +135,7 @@ func TestExpDecaySampleRescale(t *testing.T) { func TestExpDecaySampleSnapshot(t *testing.T) { now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) + s := NewExpDecaySample(100, 0.99).(*ExpDecaySample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) } @@ -194,44 +146,44 @@ func TestExpDecaySampleSnapshot(t *testing.T) { func TestExpDecaySampleStatistics(t *testing.T) { now := time.Now() - rand.Seed(1) - s := NewExpDecaySample(100, 0.99) + s := NewExpDecaySample(100, 0.99).(*ExpDecaySample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.(*ExpDecaySample).update(now.Add(time.Duration(i)), int64(i)) } - testExpDecaySampleStatistics(t, s) + testExpDecaySampleStatistics(t, s.Snapshot()) } func TestUniformSample(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) + sw := NewUniformSample(100) for i := 0; i < 1000; i++ { - s.Update(int64(i)) + sw.Update(int64(i)) } + s := sw.Snapshot() if size := s.Count(); size != 1000 { t.Errorf("s.Count(): 1000 != %v\n", size) } if size := s.Size(); size != 100 { t.Errorf("s.Size(): 100 != %v\n", size) } - if l := len(s.Values()); l != 100 { + values := s.values + + if l := len(values); l != 100 { t.Errorf("len(s.Values()): 100 != %v\n", l) } - for _, v := range s.Values() { + for _, v := range values { if v > 1000 || v < 0 { - t.Errorf("out of range [0, 100): %v\n", v) + t.Errorf("out of range [0, 1000]: %v\n", v) } } } func TestUniformSampleIncludesTail(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) + sw := NewUniformSample(100) max := 100 for i := 0; i < max; i++ { - s.Update(int64(i)) + sw.Update(int64(i)) } - v := s.Values() + v := sw.Snapshot().values sum := 0 exp := (max - 1) * max / 2 for i := 0; i < len(v); i++ { @@ -243,7 +195,7 @@ func TestUniformSampleIncludesTail(t *testing.T) { } func TestUniformSampleSnapshot(t *testing.T) { - s := NewUniformSample(100) + s := NewUniformSample(100).(*UniformSample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.Update(int64(i)) } @@ -253,29 +205,23 @@ func TestUniformSampleSnapshot(t *testing.T) { } func TestUniformSampleStatistics(t *testing.T) { - rand.Seed(1) - s := NewUniformSample(100) + s := NewUniformSample(100).(*UniformSample).SetRand(rand.New(rand.NewSource(1))) for i := 1; i <= 10000; i++ { s.Update(int64(i)) } - testUniformSampleStatistics(t, s) + testUniformSampleStatistics(t, s.Snapshot()) } func benchmarkSample(b *testing.B, s Sample) { - var memStats runtime.MemStats - runtime.ReadMemStats(&memStats) - pauseTotalNs := memStats.PauseTotalNs - b.ResetTimer() for i := 0; i < b.N; i++ { s.Update(1) } - b.StopTimer() - runtime.GC() - runtime.ReadMemStats(&memStats) - b.Logf("GC cost: %d ns/op", int(memStats.PauseTotalNs-pauseTotalNs)/b.N) } -func testExpDecaySampleStatistics(t *testing.T, s Sample) { +func testExpDecaySampleStatistics(t *testing.T, s *sampleSnapshot) { + if sum := s.Sum(); sum != 496598 { + t.Errorf("s.Sum(): 496598 != %v\n", sum) + } if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } @@ -303,7 +249,7 @@ func testExpDecaySampleStatistics(t *testing.T, s Sample) { } } -func testUniformSampleStatistics(t *testing.T, s Sample) { +func testUniformSampleStatistics(t *testing.T, s *sampleSnapshot) { if count := s.Count(); count != 10000 { t.Errorf("s.Count(): 10000 != %v\n", count) } @@ -326,7 +272,7 @@ func testUniformSampleStatistics(t *testing.T, s Sample) { if ps[1] != 7380.5 { t.Errorf("75th percentile: 7380.5 != %v\n", ps[1]) } - if ps[2] != 9986.429999999998 { + if math.Abs(9986.429999999998-ps[2]) > epsilonPercentile { t.Errorf("99th percentile: 9986.429999999998 != %v\n", ps[2]) } } @@ -345,6 +291,7 @@ func TestUniformSampleConcurrentUpdateCount(t *testing.T) { quit := make(chan struct{}) go func() { t := time.NewTicker(10 * time.Millisecond) + defer t.Stop() for { select { case <-t.C: @@ -356,8 +303,22 @@ func TestUniformSampleConcurrentUpdateCount(t *testing.T) { } }() for i := 0; i < 1000; i++ { - s.Count() + s.Snapshot().Count() time.Sleep(5 * time.Millisecond) } quit <- struct{}{} } + +func BenchmarkCalculatePercentiles(b *testing.B) { + pss := []float64{0.5, 0.75, 0.95, 0.99, 0.999, 0.9999} + var vals []int64 + for i := 0; i < 1000; i++ { + vals = append(vals, int64(rand.Int31())) + } + v := make([]int64, len(vals)) + b.ResetTimer() + for i := 0; i < b.N; i++ { + copy(v, vals) + _ = CalculatePercentiles(v, pss) + } +} diff --git a/metrics/syslog.go b/metrics/syslog.go index a0ed4b1b2364..0bc4ed0da59f 100644 --- a/metrics/syslog.go +++ b/metrics/syslog.go @@ -1,3 +1,4 @@ +//go:build !windows // +build !windows package metrics @@ -14,13 +15,17 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { for range time.Tick(d) { r.Each(func(name string, i interface{}) { switch metric := i.(type) { - case Counter: - w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Count())) - case Gauge: - w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Value())) - case GaugeFloat64: - w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Value())) - case Healthcheck: + case *Counter: + w.Info(fmt.Sprintf("counter %s: count: %d", name, metric.Snapshot().Count())) + case *CounterFloat64: + w.Info(fmt.Sprintf("counter %s: count: %f", name, metric.Snapshot().Count())) + case *Gauge: + w.Info(fmt.Sprintf("gauge %s: value: %d", name, metric.Snapshot().Value())) + case *GaugeFloat64: + w.Info(fmt.Sprintf("gauge %s: value: %f", name, metric.Snapshot().Value())) + case *GaugeInfo: + w.Info(fmt.Sprintf("gauge %s: value: %s", name, metric.Snapshot().Value())) + case *Healthcheck: metric.Check() w.Info(fmt.Sprintf("healthcheck %s: error: %v", name, metric.Error())) case Histogram: @@ -40,7 +45,7 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { ps[3], ps[4], )) - case Meter: + case *Meter: m := metric.Snapshot() w.Info(fmt.Sprintf( "meter %s: count: %d 1-min: %.2f 5-min: %.2f 15-min: %.2f mean: %.2f", @@ -51,7 +56,7 @@ func Syslog(r Registry, d time.Duration, w *syslog.Writer) { m.Rate15(), m.RateMean(), )) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) w.Info(fmt.Sprintf( diff --git a/metrics/testdata/opentsb.want b/metrics/testdata/opentsb.want new file mode 100644 index 000000000000..43fe1b2ac27a --- /dev/null +++ b/metrics/testdata/opentsb.want @@ -0,0 +1,23 @@ +put pre.elite.count 978307200 1337 host=hal9000 +put pre.elite.one-minute 978307200 0.00 host=hal9000 +put pre.elite.five-minute 978307200 0.00 host=hal9000 +put pre.elite.fifteen-minute 978307200 0.00 host=hal9000 +put pre.elite.mean 978307200 0.00 host=hal9000 +put pre.foo.value 978307200 {"chain_id":"5"} host=hal9000 +put pre.months.count 978307200 12 host=hal9000 +put pre.pi.value 978307200 3.140000 host=hal9000 +put pre.second.count 978307200 1 host=hal9000 +put pre.second.min 978307200 1000 host=hal9000 +put pre.second.max 978307200 1000 host=hal9000 +put pre.second.mean 978307200 1000.00 host=hal9000 +put pre.second.std-dev 978307200 0.00 host=hal9000 +put pre.second.50-percentile 978307200 1000.00 host=hal9000 +put pre.second.75-percentile 978307200 1000.00 host=hal9000 +put pre.second.95-percentile 978307200 1000.00 host=hal9000 +put pre.second.99-percentile 978307200 1000.00 host=hal9000 +put pre.second.999-percentile 978307200 1000.00 host=hal9000 +put pre.second.one-minute 978307200 0.00 host=hal9000 +put pre.second.five-minute 978307200 0.00 host=hal9000 +put pre.second.fifteen-minute 978307200 0.00 host=hal9000 +put pre.second.mean-rate 978307200 0.00 host=hal9000 +put pre.tau.count 978307200 1.570000 host=hal9000 diff --git a/metrics/timer.go b/metrics/timer.go index 89e22208fde0..9df15c967aba 100644 --- a/metrics/timer.go +++ b/metrics/timer.go @@ -5,55 +5,30 @@ import ( "time" ) -// Timers capture the duration and rate of events. -type Timer interface { - Count() int64 - Max() int64 - Mean() float64 - Min() int64 - Percentile(float64) float64 - Percentiles([]float64) []float64 - Rate1() float64 - Rate5() float64 - Rate15() float64 - RateMean() float64 - Snapshot() Timer - StdDev() float64 - Stop() - Sum() int64 - Time(func()) - Update(time.Duration) - UpdateSince(time.Time) - Variance() float64 -} - // GetOrRegisterTimer returns an existing Timer or constructs and registers a -// new StandardTimer. +// new Timer. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func GetOrRegisterTimer(name string, r Registry) Timer { +func GetOrRegisterTimer(name string, r Registry) *Timer { if nil == r { r = DefaultRegistry } - return r.GetOrRegister(name, NewTimer).(Timer) + return r.GetOrRegister(name, NewTimer).(*Timer) } -// NewCustomTimer constructs a new StandardTimer from a Histogram and a Meter. +// NewCustomTimer constructs a new Timer from a Histogram and a Meter. // Be sure to call Stop() once the timer is of no use to allow for garbage collection. -func NewCustomTimer(h Histogram, m Meter) Timer { - if !Enabled { - return NilTimer{} - } - return &StandardTimer{ +func NewCustomTimer(h Histogram, m *Meter) *Timer { + return &Timer{ histogram: h, meter: m, } } -// NewRegisteredTimer constructs and registers a new StandardTimer. +// NewRegisteredTimer constructs and registers a new Timer. // Be sure to unregister the meter from the registry once it is of no use to // allow for garbage collection. -func NewRegisteredTimer(name string, r Registry) Timer { +func NewRegisteredTimer(name string, r Registry) *Timer { c := NewTimer() if nil == r { r = DefaultRegistry @@ -62,196 +37,62 @@ func NewRegisteredTimer(name string, r Registry) Timer { return c } -// NewTimer constructs a new StandardTimer using an exponentially-decaying +// NewTimer constructs a new Timer using an exponentially-decaying // sample with the same reservoir size and alpha as UNIX load averages. // Be sure to call Stop() once the timer is of no use to allow for garbage collection. -func NewTimer() Timer { - if !Enabled { - return NilTimer{} - } - return &StandardTimer{ +func NewTimer() *Timer { + return &Timer{ histogram: NewHistogram(NewExpDecaySample(1028, 0.015)), meter: NewMeter(), } } -// NilTimer is a no-op Timer. -type NilTimer struct { - h Histogram - m Meter -} - -// Count is a no-op. -func (NilTimer) Count() int64 { return 0 } - -// Max is a no-op. -func (NilTimer) Max() int64 { return 0 } - -// Mean is a no-op. -func (NilTimer) Mean() float64 { return 0.0 } - -// Min is a no-op. -func (NilTimer) Min() int64 { return 0 } - -// Percentile is a no-op. -func (NilTimer) Percentile(p float64) float64 { return 0.0 } - -// Percentiles is a no-op. -func (NilTimer) Percentiles(ps []float64) []float64 { - return make([]float64, len(ps)) -} - -// Rate1 is a no-op. -func (NilTimer) Rate1() float64 { return 0.0 } - -// Rate5 is a no-op. -func (NilTimer) Rate5() float64 { return 0.0 } - -// Rate15 is a no-op. -func (NilTimer) Rate15() float64 { return 0.0 } - -// RateMean is a no-op. -func (NilTimer) RateMean() float64 { return 0.0 } - -// Snapshot is a no-op. -func (NilTimer) Snapshot() Timer { return NilTimer{} } - -// StdDev is a no-op. -func (NilTimer) StdDev() float64 { return 0.0 } - -// Stop is a no-op. -func (NilTimer) Stop() {} - -// Sum is a no-op. -func (NilTimer) Sum() int64 { return 0 } - -// Time is a no-op. -func (NilTimer) Time(func()) {} - -// Update is a no-op. -func (NilTimer) Update(time.Duration) {} - -// UpdateSince is a no-op. -func (NilTimer) UpdateSince(time.Time) {} - -// Variance is a no-op. -func (NilTimer) Variance() float64 { return 0.0 } - -// StandardTimer is the standard implementation of a Timer and uses a Histogram -// and Meter. -type StandardTimer struct { +// Timer captures the duration and rate of events, using a Histogram and a Meter. +type Timer struct { histogram Histogram - meter Meter + meter *Meter mutex sync.Mutex } -// Count returns the number of events recorded. -func (t *StandardTimer) Count() int64 { - return t.histogram.Count() -} - -// Max returns the maximum value in the sample. -func (t *StandardTimer) Max() int64 { - return t.histogram.Max() -} - -// Mean returns the mean of the values in the sample. -func (t *StandardTimer) Mean() float64 { - return t.histogram.Mean() -} - -// Min returns the minimum value in the sample. -func (t *StandardTimer) Min() int64 { - return t.histogram.Min() -} - -// Percentile returns an arbitrary percentile of the values in the sample. -func (t *StandardTimer) Percentile(p float64) float64 { - return t.histogram.Percentile(p) -} - -// Percentiles returns a slice of arbitrary percentiles of the values in the -// sample. -func (t *StandardTimer) Percentiles(ps []float64) []float64 { - return t.histogram.Percentiles(ps) -} - -// Rate1 returns the one-minute moving average rate of events per second. -func (t *StandardTimer) Rate1() float64 { - return t.meter.Rate1() -} - -// Rate5 returns the five-minute moving average rate of events per second. -func (t *StandardTimer) Rate5() float64 { - return t.meter.Rate5() -} - -// Rate15 returns the fifteen-minute moving average rate of events per second. -func (t *StandardTimer) Rate15() float64 { - return t.meter.Rate15() -} - -// RateMean returns the meter's mean rate of events per second. -func (t *StandardTimer) RateMean() float64 { - return t.meter.RateMean() -} - // Snapshot returns a read-only copy of the timer. -func (t *StandardTimer) Snapshot() Timer { +func (t *Timer) Snapshot() *TimerSnapshot { t.mutex.Lock() defer t.mutex.Unlock() return &TimerSnapshot{ - histogram: t.histogram.Snapshot().(*HistogramSnapshot), - meter: t.meter.Snapshot().(*MeterSnapshot), + histogram: t.histogram.Snapshot(), + meter: t.meter.Snapshot(), } } -// StdDev returns the standard deviation of the values in the sample. -func (t *StandardTimer) StdDev() float64 { - return t.histogram.StdDev() -} - // Stop stops the meter. -func (t *StandardTimer) Stop() { +func (t *Timer) Stop() { t.meter.Stop() } -// Sum returns the sum in the sample. -func (t *StandardTimer) Sum() int64 { - return t.histogram.Sum() -} - -// Record the duration of the execution of the given function. -func (t *StandardTimer) Time(f func()) { +// Time record the duration of the execution of the given function. +func (t *Timer) Time(f func()) { ts := time.Now() f() t.Update(time.Since(ts)) } -// Record the duration of an event. -func (t *StandardTimer) Update(d time.Duration) { - t.mutex.Lock() - defer t.mutex.Unlock() - t.histogram.Update(int64(d)) - t.meter.Mark(1) -} - -// Record the duration of an event that started at a time and ends now. -func (t *StandardTimer) UpdateSince(ts time.Time) { +// Update the duration of an event, in nanoseconds. +func (t *Timer) Update(d time.Duration) { t.mutex.Lock() defer t.mutex.Unlock() - t.histogram.Update(int64(time.Since(ts))) + t.histogram.Update(d.Nanoseconds()) t.meter.Mark(1) } -// Variance returns the variance of the values in the sample. -func (t *StandardTimer) Variance() float64 { - return t.histogram.Variance() +// UpdateSince update the duration of an event that started at a time and ends now. +// The record uses nanoseconds. +func (t *Timer) UpdateSince(ts time.Time) { + t.Update(time.Since(ts)) } // TimerSnapshot is a read-only copy of another Timer. type TimerSnapshot struct { - histogram *HistogramSnapshot + histogram HistogramSnapshot meter *MeterSnapshot } @@ -262,6 +103,9 @@ func (t *TimerSnapshot) Count() int64 { return t.histogram.Count() } // Max returns the maximum value at the time the snapshot was taken. func (t *TimerSnapshot) Max() int64 { return t.histogram.Max() } +// Size returns the size of the sample at the time the snapshot was taken. +func (t *TimerSnapshot) Size() int { return t.histogram.Size() } + // Mean returns the mean value at the time the snapshot was taken. func (t *TimerSnapshot) Mean() float64 { return t.histogram.Mean() } @@ -296,34 +140,13 @@ func (t *TimerSnapshot) Rate15() float64 { return t.meter.Rate15() } // snapshot was taken. func (t *TimerSnapshot) RateMean() float64 { return t.meter.RateMean() } -// Snapshot returns the snapshot. -func (t *TimerSnapshot) Snapshot() Timer { return t } - // StdDev returns the standard deviation of the values at the time the snapshot // was taken. func (t *TimerSnapshot) StdDev() float64 { return t.histogram.StdDev() } -// Stop is a no-op. -func (t *TimerSnapshot) Stop() {} - // Sum returns the sum at the time the snapshot was taken. func (t *TimerSnapshot) Sum() int64 { return t.histogram.Sum() } -// Time panics. -func (*TimerSnapshot) Time(func()) { - panic("Time called on a TimerSnapshot") -} - -// Update panics. -func (*TimerSnapshot) Update(time.Duration) { - panic("Update called on a TimerSnapshot") -} - -// UpdateSince panics. -func (*TimerSnapshot) UpdateSince(time.Time) { - panic("UpdateSince called on a TimerSnapshot") -} - // Variance returns the variance of the values at the time the snapshot was // taken. func (t *TimerSnapshot) Variance() float64 { return t.histogram.Variance() } diff --git a/metrics/timer_test.go b/metrics/timer_test.go index d3750e4f9fa3..0afa63a23072 100644 --- a/metrics/timer_test.go +++ b/metrics/timer_test.go @@ -18,7 +18,7 @@ func BenchmarkTimer(b *testing.B) { func TestGetOrRegisterTimer(t *testing.T) { r := NewRegistry() NewRegisteredTimer("foo", r).Update(47) - if tm := GetOrRegisterTimer("foo", r); tm.Count() != 1 { + if tm := GetOrRegisterTimer("foo", r).Snapshot(); tm.Count() != 1 { t.Fatal(tm) } } @@ -27,7 +27,7 @@ func TestTimerExtremes(t *testing.T) { tm := NewTimer() tm.Update(math.MaxInt64) tm.Update(0) - if stdDev := tm.StdDev(); stdDev != 4.611686018427388e+18 { + if stdDev := tm.Snapshot().StdDev(); stdDev != 4.611686018427388e+18 { t.Errorf("tm.StdDev(): 4.611686018427388e+18 != %v\n", stdDev) } } @@ -45,15 +45,28 @@ func TestTimerStop(t *testing.T) { } func TestTimerFunc(t *testing.T) { - tm := NewTimer() - tm.Time(func() { time.Sleep(50e6) }) - if max := tm.Max(); 35e6 > max || max > 95e6 { - t.Errorf("tm.Max(): 35e6 > %v || %v > 95e6\n", max, max) + var ( + tm = NewTimer() + testStart = time.Now() + actualTime time.Duration + ) + tm.Time(func() { + time.Sleep(50 * time.Millisecond) + actualTime = time.Since(testStart) + }) + var ( + drift = time.Millisecond * 2 + measured = time.Duration(tm.Snapshot().Max()) + ceil = actualTime + drift + floor = actualTime - drift + ) + if measured > ceil || measured < floor { + t.Errorf("tm.Max(): %v > %v || %v > %v\n", measured, ceil, measured, floor) } } func TestTimerZero(t *testing.T) { - tm := NewTimer() + tm := NewTimer().Snapshot() if count := tm.Count(); count != 0 { t.Errorf("tm.Count(): 0 != %v\n", count) } @@ -97,5 +110,5 @@ func ExampleGetOrRegisterTimer() { m := "account.create.latency" t := GetOrRegisterTimer(m, nil) t.Update(47) - fmt.Println(t.Max()) // Output: 47 + fmt.Println(t.Snapshot().Max()) // Output: 47 } diff --git a/metrics/writer.go b/metrics/writer.go index 88521a80d9d7..2a41f8e1fe36 100644 --- a/metrics/writer.go +++ b/metrics/writer.go @@ -3,7 +3,8 @@ package metrics import ( "fmt" "io" - "sort" + "slices" + "strings" "time" ) @@ -18,24 +19,29 @@ func Write(r Registry, d time.Duration, w io.Writer) { // WriteOnce sorts and writes metrics in the given registry to the given // io.Writer. func WriteOnce(r Registry, w io.Writer) { - var namedMetrics namedMetricSlice + var namedMetrics []namedMetric r.Each(func(name string, i interface{}) { namedMetrics = append(namedMetrics, namedMetric{name, i}) }) - - sort.Sort(namedMetrics) + slices.SortFunc(namedMetrics, namedMetric.cmp) for _, namedMetric := range namedMetrics { switch metric := namedMetric.m.(type) { - case Counter: + case *Counter: + fmt.Fprintf(w, "counter %s\n", namedMetric.name) + fmt.Fprintf(w, " count: %9d\n", metric.Snapshot().Count()) + case *CounterFloat64: fmt.Fprintf(w, "counter %s\n", namedMetric.name) - fmt.Fprintf(w, " count: %9d\n", metric.Count()) - case Gauge: + fmt.Fprintf(w, " count: %f\n", metric.Snapshot().Count()) + case *Gauge: + fmt.Fprintf(w, "gauge %s\n", namedMetric.name) + fmt.Fprintf(w, " value: %9d\n", metric.Snapshot().Value()) + case *GaugeFloat64: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) - fmt.Fprintf(w, " value: %9d\n", metric.Value()) - case GaugeFloat64: + fmt.Fprintf(w, " value: %f\n", metric.Snapshot().Value()) + case *GaugeInfo: fmt.Fprintf(w, "gauge %s\n", namedMetric.name) - fmt.Fprintf(w, " value: %f\n", metric.Value()) - case Healthcheck: + fmt.Fprintf(w, " value: %s\n", metric.Snapshot().Value().String()) + case *Healthcheck: metric.Check() fmt.Fprintf(w, "healthcheck %s\n", namedMetric.name) fmt.Fprintf(w, " error: %v\n", metric.Error()) @@ -53,7 +59,7 @@ func WriteOnce(r Registry, w io.Writer) { fmt.Fprintf(w, " 95%%: %12.2f\n", ps[2]) fmt.Fprintf(w, " 99%%: %12.2f\n", ps[3]) fmt.Fprintf(w, " 99.9%%: %12.2f\n", ps[4]) - case Meter: + case *Meter: m := metric.Snapshot() fmt.Fprintf(w, "meter %s\n", namedMetric.name) fmt.Fprintf(w, " count: %9d\n", m.Count()) @@ -61,7 +67,7 @@ func WriteOnce(r Registry, w io.Writer) { fmt.Fprintf(w, " 5-min rate: %12.2f\n", m.Rate5()) fmt.Fprintf(w, " 15-min rate: %12.2f\n", m.Rate15()) fmt.Fprintf(w, " mean rate: %12.2f\n", m.RateMean()) - case Timer: + case *Timer: t := metric.Snapshot() ps := t.Percentiles([]float64{0.5, 0.75, 0.95, 0.99, 0.999}) fmt.Fprintf(w, "timer %s\n", namedMetric.name) @@ -88,13 +94,6 @@ type namedMetric struct { m interface{} } -// namedMetricSlice is a slice of namedMetrics that implements sort.Interface. -type namedMetricSlice []namedMetric - -func (nms namedMetricSlice) Len() int { return len(nms) } - -func (nms namedMetricSlice) Swap(i, j int) { nms[i], nms[j] = nms[j], nms[i] } - -func (nms namedMetricSlice) Less(i, j int) bool { - return nms[i].name < nms[j].name +func (m namedMetric) cmp(other namedMetric) int { + return strings.Compare(m.name, other.name) } diff --git a/metrics/writer_test.go b/metrics/writer_test.go index 1aacc287121b..edcfe955abcf 100644 --- a/metrics/writer_test.go +++ b/metrics/writer_test.go @@ -1,19 +1,19 @@ package metrics import ( - "sort" + "slices" "testing" ) func TestMetricsSorting(t *testing.T) { - var namedMetrics = namedMetricSlice{ + var namedMetrics = []namedMetric{ {name: "zzz"}, {name: "bbb"}, {name: "fff"}, {name: "ggg"}, } - sort.Sort(namedMetrics) + slices.SortFunc(namedMetrics, namedMetric.cmp) for i, name := range []string{"bbb", "fff", "ggg", "zzz"} { if namedMetrics[i].name != name { t.Fail() diff --git a/miner/unconfirmed_test.go b/miner/unconfirmed_test.go index 71a0dde931a5..781ba0636651 100644 --- a/miner/unconfirmed_test.go +++ b/miner/unconfirmed_test.go @@ -72,7 +72,7 @@ func TestUnconfirmedShifts(t *testing.T) { if n := pool.blocks.Len(); n != int(limit)/2 { t.Errorf("unconfirmed count mismatch: have %d, want %d", n, limit/2) } - // Try to shift all the remaining blocks out and verify emptyness + // Try to shift all the remaining blocks out and verify emptiness pool.Shift(start + 2*uint64(limit)) if n := pool.blocks.Len(); n != 0 { t.Errorf("unconfirmed count mismatch: have %d, want %d", n, 0) diff --git a/node/node_test.go b/node/node_test.go index df97c66c71ca..b733ea517492 100644 --- a/node/node_test.go +++ b/node/node_test.go @@ -651,5 +651,6 @@ func doHTTPRequest(t *testing.T, req *http.Request) *http.Response { if err != nil { t.Error("could not issue a GET request to the given endpoint", err) } + t.Cleanup(func() { resp.Body.Close() }) return resp } diff --git a/p2p/metrics.go b/p2p/metrics.go index 0f1f42b08a48..febff6c4ece9 100644 --- a/p2p/metrics.go +++ b/p2p/metrics.go @@ -42,7 +42,7 @@ type meteredConn struct { // returns the original object. func newMeteredConn(conn net.Conn, ingress bool) net.Conn { // Short circuit if metrics are disabled - if !metrics.Enabled { + if !metrics.Enabled() { return conn } // Otherwise bump the connection counters and wrap the connection diff --git a/p2p/server.go b/p2p/server.go index fabbb9cdbfcd..51579ff7c30c 100644 --- a/p2p/server.go +++ b/p2p/server.go @@ -650,9 +650,7 @@ running: // This channel is used by RemoveTrustedPeer to remove an enode // from the trusted node set. srv.log.Trace("Removing trusted node", "node", n) - if _, ok := trusted[n.ID]; ok { - delete(trusted, n.ID) - } + delete(trusted, n.ID) // Unmark any already-connected peer as trusted if p, ok := peers[n.ID]; ok { p.rw.set(trustedConn, false) diff --git a/p2p/simulations/mocker_test.go b/p2p/simulations/mocker_test.go index 32b566ecee7e..8cc84f8e83b8 100644 --- a/p2p/simulations/mocker_test.go +++ b/p2p/simulations/mocker_test.go @@ -127,6 +127,7 @@ func TestMocker(t *testing.T) { if err != nil { t.Fatalf("Could not start mocker: %s", err) } + resp.Body.Close() if resp.StatusCode != 200 { t.Fatalf("Invalid Status Code received for starting mocker, expected 200, got %d", resp.StatusCode) } @@ -148,15 +149,17 @@ func TestMocker(t *testing.T) { if err != nil { t.Fatalf("Could not stop mocker: %s", err) } + resp.Body.Close() if resp.StatusCode != 200 { t.Fatalf("Invalid Status Code received for stopping mocker, expected 200, got %d", resp.StatusCode) } //reset the network - _, err = http.Post(s.URL+"/reset", "", nil) + resp, err = http.Post(s.URL+"/reset", "", nil) if err != nil { t.Fatalf("Could not reset network: %s", err) } + resp.Body.Close() //now the number of nodes in the network should be zero nodes_info, err = client.GetNodes() diff --git a/rpc/handler.go b/rpc/handler.go index 0e407abec900..777e47e7c88f 100644 --- a/rpc/handler.go +++ b/rpc/handler.go @@ -340,12 +340,12 @@ func (h *handler) handleCall(cp *callProc, msg *jsonrpcMessage) *jsonrpcMessage if callb != h.unsubscribeCb { rpcRequestGauge.Inc(1) if answer.Error != nil { - failedReqeustGauge.Inc(1) + failedRequestGauge.Inc(1) } else { successfulRequestGauge.Inc(1) } rpcServingTimer.UpdateSince(start) - newRPCServingTimer(msg.Method, answer.Error == nil).UpdateSince(start) + updateServeTimeHistogram(msg.Method, answer.Error == nil, time.Since(start)) } return answer } diff --git a/rpc/http_test.go b/rpc/http_test.go index 73920b44f09a..73124fb3508a 100644 --- a/rpc/http_test.go +++ b/rpc/http_test.go @@ -92,6 +92,7 @@ func confirmHTTPRequestYieldsStatusCode(t *testing.T, method, contentType, body if err != nil { t.Fatalf("request failed: %v", err) } + resp.Body.Close() confirmStatusCode(t, resp.StatusCode, expectedStatusCode) } diff --git a/rpc/metrics.go b/rpc/metrics.go index ebb407fa3dad..ea8837f6664b 100644 --- a/rpc/metrics.go +++ b/rpc/metrics.go @@ -18,6 +18,7 @@ package rpc import ( "fmt" + "time" "github.com/XinFinOrg/XDPoSChain/metrics" ) @@ -25,15 +26,25 @@ import ( var ( rpcRequestGauge = metrics.NewRegisteredGauge("rpc/requests", nil) successfulRequestGauge = metrics.NewRegisteredGauge("rpc/success", nil) - failedReqeustGauge = metrics.NewRegisteredGauge("rpc/failure", nil) - rpcServingTimer = metrics.NewRegisteredTimer("rpc/duration/all", nil) + failedRequestGauge = metrics.NewRegisteredGauge("rpc/failure", nil) + + // serveTimeHistName is the prefix of the per-request serving time histograms. + serveTimeHistName = "rpc/duration" + + rpcServingTimer = metrics.NewRegisteredTimer("rpc/duration/all", nil) ) -func newRPCServingTimer(method string, valid bool) metrics.Timer { - flag := "success" - if !valid { - flag = "failure" +// updateServeTimeHistogram tracks the serving time of a remote RPC call. +func updateServeTimeHistogram(method string, success bool, elapsed time.Duration) { + note := "success" + if !success { + note = "failure" + } + h := fmt.Sprintf("%s/%s/%s", serveTimeHistName, method, note) + sampler := func() metrics.Sample { + return metrics.ResettingSample( + metrics.NewExpDecaySample(1028, 0.015), + ) } - m := fmt.Sprintf("rpc/duration/%s/%s", method, flag) - return metrics.GetOrRegisterTimer(m, nil) + metrics.GetOrRegisterHistogramLazy(h, nil, sampler).Update(elapsed.Nanoseconds()) }