From b7dcdb6a9c6e2682cd87708f7766e9a0aab5e009 Mon Sep 17 00:00:00 2001 From: Paul Bellamy Date: Tue, 18 May 2021 15:39:52 +0100 Subject: [PATCH 1/4] Change db subsystem to a label to make aggregation easier --- support/db/metrics.go | 80 ++++++++++++++++++++++++++++++++----------- 1 file changed, 60 insertions(+), 20 deletions(-) diff --git a/support/db/metrics.go b/support/db/metrics.go index 1dbfed3d8e..f56b6720c5 100644 --- a/support/db/metrics.go +++ b/support/db/metrics.go @@ -56,34 +56,39 @@ type SessionWithMetrics struct { } func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *prometheus.Registry) SessionInterface { - subsystem := fmt.Sprintf("db_%s", sub) s := &SessionWithMetrics{ SessionInterface: base, registry: registry, } s.queryCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{Namespace: namespace, Subsystem: subsystem, Name: "query_total"}, + prometheus.CounterOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "query_total", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + }, []string{"query_type", "error", "route"}, ) registry.MustRegister(s.queryCounter) s.queryDurationSummary = prometheus.NewSummaryVec( prometheus.SummaryOpts{ - Namespace: namespace, Subsystem: subsystem, - Name: "query_duration_seconds", + Namespace: namespace, Subsystem: "db", + Name: "query_duration_seconds", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, []string{"query_type", "error", "route"}, ) registry.MustRegister(s.queryDurationSummary) // txnCounter: prometheus.NewCounter( - // prometheus.CounterOpts{Namespace: namespace, Subsystem: subsystem, Name: "transaction_total"}, + // prometheus.CounterOpts{Namespace: namespace, Subsystem: "db", Name: "transaction_total"}, // ), // registry.MustRegister(s.txnCounter) // txnDuration: prometheus.NewHistogram( // prometheus.HistogramOpts{ - // Namespace: namespace, Subsystem: subsystem, + // Namespace: namespace, Subsystem: "db", // Name: "transaction_duration_seconds", // Buckets: prometheus.ExponentialBuckets(0.1, 3, 5), // }, @@ -91,7 +96,12 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p // registry.MustRegister(s.txnDuration) s.maxOpenConnectionsGauge = prometheus.NewGaugeFunc( - prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "max_open_connections"}, + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "max_open_connections", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + }, func() float64 { // Right now MaxOpenConnections in Horizon is static however it's possible that // it will change one day. In such case, using GaugeFunc is very cheap and will @@ -102,7 +112,12 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p registry.MustRegister(s.maxOpenConnectionsGauge) s.openConnectionsGauge = prometheus.NewGaugeFunc( - prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "open_connections"}, + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "open_connections", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + }, func() float64 { return float64(base.DB.Stats().OpenConnections) }, @@ -110,7 +125,12 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p registry.MustRegister(s.openConnectionsGauge) s.inUseConnectionsGauge = prometheus.NewGaugeFunc( - prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "in_use_connections"}, + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "in_use_connections", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + }, func() float64 { return float64(base.DB.Stats().InUse) }, @@ -118,7 +138,12 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p registry.MustRegister(s.inUseConnectionsGauge) s.idleConnectionsGauge = prometheus.NewGaugeFunc( - prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "idle_connections"}, + prometheus.GaugeOpts{ + Namespace: namespace, + Subsystem: "db", + Name: "idle_connections", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + }, func() float64 { return float64(base.DB.Stats().Idle) }, @@ -127,8 +152,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p s.waitCountCounter = prometheus.NewCounterFunc( prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, Name: "wait_count_total", - Help: "total number of number of connections waited for", + Namespace: namespace, + Subsystem: "db", + Name: "wait_count_total", + Help: "total number of number of connections waited for", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, func() float64 { return float64(base.DB.Stats().WaitCount) @@ -138,8 +166,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p s.waitDurationCounter = prometheus.NewCounterFunc( prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, Name: "wait_duration_seconds_total", - Help: "total time blocked waiting for a new connection", + Namespace: namespace, + Subsystem: "db", + Name: "wait_duration_seconds_total", + Help: "total time blocked waiting for a new connection", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, func() float64 { return base.DB.Stats().WaitDuration.Seconds() @@ -149,8 +180,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p s.maxIdleClosedCounter = prometheus.NewCounterFunc( prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, Name: "max_idle_closed_total", - Help: "total number of number of connections closed due to SetMaxIdleConns", + Namespace: namespace, + Subsystem: "db", + Name: "max_idle_closed_total", + Help: "total number of number of connections closed due to SetMaxIdleConns", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxIdleClosed) @@ -160,8 +194,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p s.maxIdleTimeClosedCounter = prometheus.NewCounterFunc( prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, Name: "max_idle_time_closed_total", - Help: "total number of number of connections closed due to SetConnMaxIdleTime", + Namespace: namespace, + Subsystem: "db", + Name: "max_idle_time_closed_total", + Help: "total number of number of connections closed due to SetConnMaxIdleTime", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxIdleTimeClosed) @@ -171,8 +208,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p s.maxLifetimeClosedCounter = prometheus.NewCounterFunc( prometheus.CounterOpts{ - Namespace: namespace, Subsystem: subsystem, Name: "max_lifetime_closed_total", - Help: "total number of number of connections closed due to SetConnMaxLifetime", + Namespace: namespace, + Subsystem: "db", + Name: "max_lifetime_closed_total", + Help: "total number of number of connections closed due to SetConnMaxLifetime", + ConstLabels: prometheus.Labels{"subsystem": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxLifetimeClosed) From 05b5fd3b82b3729a8beb1e368dd1fdb2741fab7a Mon Sep 17 00:00:00 2001 From: Paul Bellamy Date: Tue, 18 May 2021 15:52:53 +0100 Subject: [PATCH 2/4] Add explicit query_type to a couple queries --- services/horizon/internal/db2/history/orderbook.go | 2 ++ services/horizon/internal/db2/history/trade.go | 2 ++ 2 files changed, 4 insertions(+) diff --git a/services/horizon/internal/db2/history/orderbook.go b/services/horizon/internal/db2/history/orderbook.go index 4ea068f022..26d5d0e359 100644 --- a/services/horizon/internal/db2/history/orderbook.go +++ b/services/horizon/internal/db2/history/orderbook.go @@ -6,6 +6,7 @@ import ( "math/big" "github.com/stellar/go/amount" + "github.com/stellar/go/support/db" "github.com/stellar/go/support/errors" "github.com/stellar/go/xdr" ) @@ -101,6 +102,7 @@ func (q *Q) GetOrderBookSummary(ctx context.Context, sellingAsset, buyingAsset x LIMIT $3 ) ` + ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType) err = q.SelectRaw(ctx, &levels, selectPriceLevels, selling, buying, maxPriceLevels) if err != nil { return result, errors.Wrap(err, "cannot select price levels") diff --git a/services/horizon/internal/db2/history/trade.go b/services/horizon/internal/db2/history/trade.go index a291d0357e..2f0ad0cb86 100644 --- a/services/horizon/internal/db2/history/trade.go +++ b/services/horizon/internal/db2/history/trade.go @@ -8,6 +8,7 @@ import ( sq "github.com/Masterminds/squirrel" "github.com/stellar/go/services/horizon/internal/db2" + "github.com/stellar/go/support/db" "github.com/stellar/go/support/errors" "github.com/stellar/go/xdr" ) @@ -195,6 +196,7 @@ func (q *TradesQ) Select(ctx context.Context, dest interface{}) error { return errors.New("TradesQ.Page call is required before calling Select") } + ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType) if q.rawSQL != "" { q.Err = q.parent.SelectRaw(ctx, dest, q.rawSQL, q.rawArgs...) } else { From fc8fcdce9ded3fd7a92df3981d1c02e4bfa89f25 Mon Sep 17 00:00:00 2001 From: Paul Bellamy Date: Tue, 18 May 2021 16:19:11 +0100 Subject: [PATCH 3/4] Add comment --- services/horizon/internal/db2/history/orderbook.go | 1 + services/horizon/internal/db2/history/trade.go | 1 + 2 files changed, 2 insertions(+) diff --git a/services/horizon/internal/db2/history/orderbook.go b/services/horizon/internal/db2/history/orderbook.go index 26d5d0e359..3de321a4ac 100644 --- a/services/horizon/internal/db2/history/orderbook.go +++ b/services/horizon/internal/db2/history/orderbook.go @@ -102,6 +102,7 @@ func (q *Q) GetOrderBookSummary(ctx context.Context, sellingAsset, buyingAsset x LIMIT $3 ) ` + // Add explicit query type for prometheus metrics, since we use raw sql. ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType) err = q.SelectRaw(ctx, &levels, selectPriceLevels, selling, buying, maxPriceLevels) if err != nil { diff --git a/services/horizon/internal/db2/history/trade.go b/services/horizon/internal/db2/history/trade.go index 2f0ad0cb86..2cf66cd312 100644 --- a/services/horizon/internal/db2/history/trade.go +++ b/services/horizon/internal/db2/history/trade.go @@ -196,6 +196,7 @@ func (q *TradesQ) Select(ctx context.Context, dest interface{}) error { return errors.New("TradesQ.Page call is required before calling Select") } + // Add explicit query type for prometheus metrics, since we use raw sql. ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType) if q.rawSQL != "" { q.Err = q.parent.SelectRaw(ctx, dest, q.rawSQL, q.rawArgs...) From 2d4ad37eb563696752ab2c60b7623b541e9a42ed Mon Sep 17 00:00:00 2001 From: Paul Bellamy Date: Tue, 18 May 2021 16:35:02 +0100 Subject: [PATCH 4/4] s/subsystem/subservice/g --- services/horizon/internal/init.go | 10 +++++----- support/db/metrics.go | 32 +++++++++++++++---------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/services/horizon/internal/init.go b/services/horizon/internal/init.go index 5665e125e0..f776081dd2 100644 --- a/services/horizon/internal/init.go +++ b/services/horizon/internal/init.go @@ -18,7 +18,7 @@ import ( "github.com/stellar/go/support/log" ) -func mustNewDBSession(subsystem db.Subsystem, databaseURL string, maxIdle, maxOpen int, registry *prometheus.Registry) db.SessionInterface { +func mustNewDBSession(subservice db.Subservice, databaseURL string, maxIdle, maxOpen int, registry *prometheus.Registry) db.SessionInterface { session, err := db.Open("postgres", databaseURL) if err != nil { log.Fatalf("cannot open Horizon DB: %v", err) @@ -26,7 +26,7 @@ func mustNewDBSession(subsystem db.Subsystem, databaseURL string, maxIdle, maxOp session.DB.SetMaxIdleConns(maxIdle) session.DB.SetMaxOpenConns(maxOpen) - return db.RegisterMetrics(session, "horizon", subsystem, registry) + return db.RegisterMetrics(session, "horizon", subservice, registry) } func mustInitHorizonDB(app *App) { @@ -44,7 +44,7 @@ func mustInitHorizonDB(app *App) { } app.historyQ = &history.Q{mustNewDBSession( - db.HistorySubsystem, + db.HistorySubservice, app.config.DatabaseURL, maxIdle, maxOpen, @@ -57,12 +57,12 @@ func initIngester(app *App) { var coreSession db.SessionInterface if !app.config.EnableCaptiveCoreIngestion { coreSession = mustNewDBSession( - db.CoreSubsystem, app.config.StellarCoreDatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry) + db.CoreSubservice, app.config.StellarCoreDatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry) } app.ingester, err = ingest.NewSystem(ingest.Config{ CoreSession: coreSession, HistorySession: mustNewDBSession( - db.IngestSubsystem, app.config.DatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry, + db.IngestSubservice, app.config.DatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry, ), NetworkPassphrase: app.config.NetworkPassphrase, // TODO: diff --git a/support/db/metrics.go b/support/db/metrics.go index f56b6720c5..1733d2d1d7 100644 --- a/support/db/metrics.go +++ b/support/db/metrics.go @@ -16,11 +16,11 @@ type CtxKey string var RouteContextKey = CtxKey("route") var QueryTypeContextKey = CtxKey("query_type") -type Subsystem string +type Subservice string -var CoreSubsystem = Subsystem("core") -var HistorySubsystem = Subsystem("history") -var IngestSubsystem = Subsystem("ingest") +var CoreSubservice = Subservice("core") +var HistorySubservice = Subservice("history") +var IngestSubservice = Subservice("ingest") type QueryType string @@ -55,7 +55,7 @@ type SessionWithMetrics struct { maxLifetimeClosedCounter prometheus.CounterFunc } -func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *prometheus.Registry) SessionInterface { +func RegisterMetrics(base *Session, namespace string, sub Subservice, registry *prometheus.Registry) SessionInterface { s := &SessionWithMetrics{ SessionInterface: base, registry: registry, @@ -66,7 +66,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Namespace: namespace, Subsystem: "db", Name: "query_total", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, []string{"query_type", "error", "route"}, ) @@ -76,7 +76,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p prometheus.SummaryOpts{ Namespace: namespace, Subsystem: "db", Name: "query_duration_seconds", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, []string{"query_type", "error", "route"}, ) @@ -100,7 +100,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Namespace: namespace, Subsystem: "db", Name: "max_open_connections", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { // Right now MaxOpenConnections in Horizon is static however it's possible that @@ -116,7 +116,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Namespace: namespace, Subsystem: "db", Name: "open_connections", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().OpenConnections) @@ -129,7 +129,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Namespace: namespace, Subsystem: "db", Name: "in_use_connections", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().InUse) @@ -142,7 +142,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Namespace: namespace, Subsystem: "db", Name: "idle_connections", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().Idle) @@ -156,7 +156,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Subsystem: "db", Name: "wait_count_total", Help: "total number of number of connections waited for", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().WaitCount) @@ -170,7 +170,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Subsystem: "db", Name: "wait_duration_seconds_total", Help: "total time blocked waiting for a new connection", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return base.DB.Stats().WaitDuration.Seconds() @@ -184,7 +184,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Subsystem: "db", Name: "max_idle_closed_total", Help: "total number of number of connections closed due to SetMaxIdleConns", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxIdleClosed) @@ -198,7 +198,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Subsystem: "db", Name: "max_idle_time_closed_total", Help: "total number of number of connections closed due to SetConnMaxIdleTime", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxIdleTimeClosed) @@ -212,7 +212,7 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p Subsystem: "db", Name: "max_lifetime_closed_total", Help: "total number of number of connections closed due to SetConnMaxLifetime", - ConstLabels: prometheus.Labels{"subsystem": string(sub)}, + ConstLabels: prometheus.Labels{"subservice": string(sub)}, }, func() float64 { return float64(base.DB.Stats().MaxLifetimeClosed)