Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

services/horizon: Fixup db metrics #3605

Merged
merged 4 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions services/horizon/internal/db2/history/orderbook.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math/big"

"github.com/stellar/go/amount"
"github.com/stellar/go/support/db"
"github.com/stellar/go/support/errors"
"github.com/stellar/go/xdr"
)
Expand Down Expand Up @@ -101,6 +102,8 @@ func (q *Q) GetOrderBookSummary(ctx context.Context, sellingAsset, buyingAsset x
LIMIT $3
)
`
// Add explicit query type for prometheus metrics, since we use raw sql.
ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType)
paulbellamy marked this conversation as resolved.
Show resolved Hide resolved
err = q.SelectRaw(ctx, &levels, selectPriceLevels, selling, buying, maxPriceLevels)
if err != nil {
return result, errors.Wrap(err, "cannot select price levels")
Expand Down
3 changes: 3 additions & 0 deletions services/horizon/internal/db2/history/trade.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
sq "github.com/Masterminds/squirrel"

"github.com/stellar/go/services/horizon/internal/db2"
"github.com/stellar/go/support/db"
"github.com/stellar/go/support/errors"
"github.com/stellar/go/xdr"
)
Expand Down Expand Up @@ -195,6 +196,8 @@ func (q *TradesQ) Select(ctx context.Context, dest interface{}) error {
return errors.New("TradesQ.Page call is required before calling Select")
}

// Add explicit query type for prometheus metrics, since we use raw sql.
ctx = context.WithValue(ctx, &db.QueryTypeContextKey, db.SelectQueryType)
if q.rawSQL != "" {
q.Err = q.parent.SelectRaw(ctx, dest, q.rawSQL, q.rawArgs...)
} else {
Expand Down
10 changes: 5 additions & 5 deletions services/horizon/internal/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ import (
"github.com/stellar/go/support/log"
)

func mustNewDBSession(subsystem db.Subsystem, databaseURL string, maxIdle, maxOpen int, registry *prometheus.Registry) db.SessionInterface {
func mustNewDBSession(subservice db.Subservice, databaseURL string, maxIdle, maxOpen int, registry *prometheus.Registry) db.SessionInterface {
session, err := db.Open("postgres", databaseURL)
if err != nil {
log.Fatalf("cannot open Horizon DB: %v", err)
}

session.DB.SetMaxIdleConns(maxIdle)
session.DB.SetMaxOpenConns(maxOpen)
return db.RegisterMetrics(session, "horizon", subsystem, registry)
return db.RegisterMetrics(session, "horizon", subservice, registry)
}

func mustInitHorizonDB(app *App) {
Expand All @@ -44,7 +44,7 @@ func mustInitHorizonDB(app *App) {
}

app.historyQ = &history.Q{mustNewDBSession(
db.HistorySubsystem,
db.HistorySubservice,
app.config.DatabaseURL,
maxIdle,
maxOpen,
Expand All @@ -57,12 +57,12 @@ func initIngester(app *App) {
var coreSession db.SessionInterface
if !app.config.EnableCaptiveCoreIngestion {
coreSession = mustNewDBSession(
db.CoreSubsystem, app.config.StellarCoreDatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry)
db.CoreSubservice, app.config.StellarCoreDatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry)
}
app.ingester, err = ingest.NewSystem(ingest.Config{
CoreSession: coreSession,
HistorySession: mustNewDBSession(
db.IngestSubsystem, app.config.DatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry,
db.IngestSubservice, app.config.DatabaseURL, ingest.MaxDBConnections, ingest.MaxDBConnections, app.prometheusRegistry,
),
NetworkPassphrase: app.config.NetworkPassphrase,
// TODO:
Expand Down
90 changes: 65 additions & 25 deletions support/db/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ type CtxKey string
var RouteContextKey = CtxKey("route")
var QueryTypeContextKey = CtxKey("query_type")

type Subsystem string
type Subservice string

var CoreSubsystem = Subsystem("core")
var HistorySubsystem = Subsystem("history")
var IngestSubsystem = Subsystem("ingest")
var CoreSubservice = Subservice("core")
var HistorySubservice = Subservice("history")
var IngestSubservice = Subservice("ingest")

type QueryType string

Expand Down Expand Up @@ -55,43 +55,53 @@ type SessionWithMetrics struct {
maxLifetimeClosedCounter prometheus.CounterFunc
}

func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *prometheus.Registry) SessionInterface {
subsystem := fmt.Sprintf("db_%s", sub)
func RegisterMetrics(base *Session, namespace string, sub Subservice, registry *prometheus.Registry) SessionInterface {
s := &SessionWithMetrics{
SessionInterface: base,
registry: registry,
}

s.queryCounter = prometheus.NewCounterVec(
prometheus.CounterOpts{Namespace: namespace, Subsystem: subsystem, Name: "query_total"},
prometheus.CounterOpts{
Namespace: namespace,
Subsystem: "db",
Name: "query_total",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
[]string{"query_type", "error", "route"},
)
registry.MustRegister(s.queryCounter)

s.queryDurationSummary = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace, Subsystem: subsystem,
Name: "query_duration_seconds",
Namespace: namespace, Subsystem: "db",
Name: "query_duration_seconds",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
[]string{"query_type", "error", "route"},
)
registry.MustRegister(s.queryDurationSummary)

// txnCounter: prometheus.NewCounter(
// prometheus.CounterOpts{Namespace: namespace, Subsystem: subsystem, Name: "transaction_total"},
// prometheus.CounterOpts{Namespace: namespace, Subsystem: "db", Name: "transaction_total"},
// ),
// registry.MustRegister(s.txnCounter)
// txnDuration: prometheus.NewHistogram(
// prometheus.HistogramOpts{
// Namespace: namespace, Subsystem: subsystem,
// Namespace: namespace, Subsystem: "db",
// Name: "transaction_duration_seconds",
// Buckets: prometheus.ExponentialBuckets(0.1, 3, 5),
// },
// ),
// registry.MustRegister(s.txnDuration)

s.maxOpenConnectionsGauge = prometheus.NewGaugeFunc(
prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "max_open_connections"},
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "db",
Name: "max_open_connections",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
// Right now MaxOpenConnections in Horizon is static however it's possible that
// it will change one day. In such case, using GaugeFunc is very cheap and will
Expand All @@ -102,23 +112,38 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p
registry.MustRegister(s.maxOpenConnectionsGauge)

s.openConnectionsGauge = prometheus.NewGaugeFunc(
prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "open_connections"},
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "db",
Name: "open_connections",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().OpenConnections)
},
)
registry.MustRegister(s.openConnectionsGauge)

s.inUseConnectionsGauge = prometheus.NewGaugeFunc(
prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "in_use_connections"},
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "db",
Name: "in_use_connections",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().InUse)
},
)
registry.MustRegister(s.inUseConnectionsGauge)

s.idleConnectionsGauge = prometheus.NewGaugeFunc(
prometheus.GaugeOpts{Namespace: namespace, Subsystem: subsystem, Name: "idle_connections"},
prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: "db",
Name: "idle_connections",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().Idle)
},
Expand All @@ -127,8 +152,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p

s.waitCountCounter = prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: namespace, Subsystem: subsystem, Name: "wait_count_total",
Help: "total number of number of connections waited for",
Namespace: namespace,
Subsystem: "db",
Name: "wait_count_total",
Help: "total number of number of connections waited for",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().WaitCount)
Expand All @@ -138,8 +166,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p

s.waitDurationCounter = prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: namespace, Subsystem: subsystem, Name: "wait_duration_seconds_total",
Help: "total time blocked waiting for a new connection",
Namespace: namespace,
Subsystem: "db",
Name: "wait_duration_seconds_total",
Help: "total time blocked waiting for a new connection",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return base.DB.Stats().WaitDuration.Seconds()
Expand All @@ -149,8 +180,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p

s.maxIdleClosedCounter = prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: namespace, Subsystem: subsystem, Name: "max_idle_closed_total",
Help: "total number of number of connections closed due to SetMaxIdleConns",
Namespace: namespace,
Subsystem: "db",
Name: "max_idle_closed_total",
Help: "total number of number of connections closed due to SetMaxIdleConns",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().MaxIdleClosed)
Expand All @@ -160,8 +194,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p

s.maxIdleTimeClosedCounter = prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: namespace, Subsystem: subsystem, Name: "max_idle_time_closed_total",
Help: "total number of number of connections closed due to SetConnMaxIdleTime",
Namespace: namespace,
Subsystem: "db",
Name: "max_idle_time_closed_total",
Help: "total number of number of connections closed due to SetConnMaxIdleTime",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().MaxIdleTimeClosed)
Expand All @@ -171,8 +208,11 @@ func RegisterMetrics(base *Session, namespace string, sub Subsystem, registry *p

s.maxLifetimeClosedCounter = prometheus.NewCounterFunc(
prometheus.CounterOpts{
Namespace: namespace, Subsystem: subsystem, Name: "max_lifetime_closed_total",
Help: "total number of number of connections closed due to SetConnMaxLifetime",
Namespace: namespace,
Subsystem: "db",
Name: "max_lifetime_closed_total",
Help: "total number of number of connections closed due to SetConnMaxLifetime",
ConstLabels: prometheus.Labels{"subservice": string(sub)},
},
func() float64 {
return float64(base.DB.Stats().MaxLifetimeClosed)
Expand Down