Skip to content

Commit

Permalink
Fix shared metrics between requests (#722)
Browse files Browse the repository at this point in the history
## Breaking changes

### dropped metrics
* mysql_exporter_scrapes_total 
* mysql_exporter_scrape_errors_total
* mysql_last_scrape_failed 

### new metrics
* mysql_exporter_collector_success (replace mysql_exporter_scrape_errors_total)

### metrics isolated between requests
* mysql_up
* mysql_exporter_collector_success
* mysql_exporter_collector_duration_seconds

## Why is this neccessary
We are using mysqld_exporter in `multi-target` method, which means one exporter for multiple mysql instance, the crucial metric `mysql_up` must be isolated between requests, or there will be misunderstandings, some mysql instance would shown as down while in fact it is not.

all shared metrics are dropped especially `mysql_exporter_scrapes_total`  and `mysql_exporter_scrape_errors_total` , the metrics are useless since prometheus can record almost everything, including scrape fail for each collector.

`mysql_last_scrape_failed`  is covered by `mysql_exporter_collector_success` so it is also dropped.

fixes #713 

---------

Signed-off-by: qizhicheng <qizhicheng@douban.com>
  • Loading branch information
LeoQuote authored Apr 11, 2023
1 parent fad2c0c commit 6ad9f25
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 70 deletions.
89 changes: 26 additions & 63 deletions collector/exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,21 @@ var (
).Default("false").Bool()
)

// Metric descriptors.
// metric definition
var (
scrapeDurationDesc = prometheus.NewDesc(
mysqlUp = prometheus.NewDesc(
prometheus.BuildFQName(namespace, "", "up"),
"Whether the MySQL server is up.",
nil,
nil,
)
mysqlScrapeCollectorSuccess = prometheus.NewDesc(
prometheus.BuildFQName(namespace, exporter, "collector_success"),
"mysqld_exporter: Whether a collector succeeded.",
[]string{"collector"},
nil,
)
mysqlScrapeDurationSeconds = prometheus.NewDesc(
prometheus.BuildFQName(namespace, exporter, "collector_duration_seconds"),
"Collector time duration.",
[]string{"collector"}, nil,
Expand All @@ -80,11 +92,10 @@ type Exporter struct {
logger log.Logger
dsn string
scrapers []Scraper
metrics Metrics
}

// New returns a new MySQL exporter for the provided DSN.
func New(ctx context.Context, dsn string, metrics Metrics, scrapers []Scraper, logger log.Logger) *Exporter {
func New(ctx context.Context, dsn string, scrapers []Scraper, logger log.Logger) *Exporter {
// Setup extra params for the DSN, default to having a lock timeout.
dsnParams := []string{fmt.Sprintf(timeoutParam, *exporterLockTimeout)}

Expand All @@ -104,37 +115,28 @@ func New(ctx context.Context, dsn string, metrics Metrics, scrapers []Scraper, l
logger: logger,
dsn: dsn,
scrapers: scrapers,
metrics: metrics,
}
}

// Describe implements prometheus.Collector.
func (e *Exporter) Describe(ch chan<- *prometheus.Desc) {
ch <- e.metrics.TotalScrapes.Desc()
ch <- e.metrics.Error.Desc()
e.metrics.ScrapeErrors.Describe(ch)
ch <- e.metrics.MySQLUp.Desc()
ch <- mysqlUp
ch <- mysqlScrapeDurationSeconds
ch <- mysqlScrapeCollectorSuccess
}

// Collect implements prometheus.Collector.
func (e *Exporter) Collect(ch chan<- prometheus.Metric) {
e.scrape(e.ctx, ch)

ch <- e.metrics.TotalScrapes
ch <- e.metrics.Error
e.metrics.ScrapeErrors.Collect(ch)
ch <- e.metrics.MySQLUp
}

func (e *Exporter) scrape(ctx context.Context, ch chan<- prometheus.Metric) {
e.metrics.TotalScrapes.Inc()
var err error

scrapeTime := time.Now()
db, err := sql.Open("mysql", e.dsn)
if err != nil {
level.Error(e.logger).Log("msg", "Error opening connection to database", "err", err)
e.metrics.Error.Set(1)
ch <- prometheus.MustNewConstMetric(mysqlUp, prometheus.GaugeValue, 0.0)
return
}
defer db.Close()
Expand All @@ -147,15 +149,13 @@ func (e *Exporter) scrape(ctx context.Context, ch chan<- prometheus.Metric) {

if err := db.PingContext(ctx); err != nil {
level.Error(e.logger).Log("msg", "Error pinging mysqld", "err", err)
e.metrics.MySQLUp.Set(0)
e.metrics.Error.Set(1)
ch <- prometheus.MustNewConstMetric(mysqlUp, prometheus.GaugeValue, 0.0)
return
}

e.metrics.MySQLUp.Set(1)
e.metrics.Error.Set(0)
ch <- prometheus.MustNewConstMetric(mysqlUp, prometheus.GaugeValue, 1.0)

ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "connection")
ch <- prometheus.MustNewConstMetric(mysqlScrapeDurationSeconds, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), "connection")

version := getMySQLVersion(db, e.logger)
var wg sync.WaitGroup
Expand All @@ -170,12 +170,13 @@ func (e *Exporter) scrape(ctx context.Context, ch chan<- prometheus.Metric) {
defer wg.Done()
label := "collect." + scraper.Name()
scrapeTime := time.Now()
collectorSuccess := 1.0
if err := scraper.Scrape(ctx, db, ch, log.With(e.logger, "scraper", scraper.Name())); err != nil {
level.Error(e.logger).Log("msg", "Error from scraper", "scraper", scraper.Name(), "err", err)
e.metrics.ScrapeErrors.WithLabelValues(label).Inc()
e.metrics.Error.Set(1)
collectorSuccess = 0.0
}
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), label)
ch <- prometheus.MustNewConstMetric(mysqlScrapeCollectorSuccess, prometheus.GaugeValue, collectorSuccess, label)
ch <- prometheus.MustNewConstMetric(mysqlScrapeDurationSeconds, prometheus.GaugeValue, time.Since(scrapeTime).Seconds(), label)
}(scraper)
}
}
Expand All @@ -195,41 +196,3 @@ func getMySQLVersion(db *sql.DB, logger log.Logger) float64 {
}
return versionNum
}

// Metrics represents exporter metrics which values can be carried between http requests.
type Metrics struct {
TotalScrapes prometheus.Counter
ScrapeErrors *prometheus.CounterVec
Error prometheus.Gauge
MySQLUp prometheus.Gauge
}

// NewMetrics creates new Metrics instance.
func NewMetrics() Metrics {
subsystem := exporter
return Metrics{
TotalScrapes: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "scrapes_total",
Help: "Total number of times MySQL was scraped for metrics.",
}),
ScrapeErrors: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "scrape_errors_total",
Help: "Total number of times an error occurred scraping a MySQL.",
}, []string{"collector"}),
Error: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Subsystem: subsystem,
Name: "last_scrape_error",
Help: "Whether the last scrape of metrics from MySQL resulted in an error (1 for error, 0 for success).",
}),
MySQLUp: prometheus.NewGauge(prometheus.GaugeOpts{
Namespace: namespace,
Name: "up",
Help: "Whether the MySQL server is up.",
}),
}
}
1 change: 0 additions & 1 deletion collector/exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ func TestExporter(t *testing.T) {
exporter := New(
context.Background(),
dsn,
NewMetrics(),
[]Scraper{
ScrapeGlobalStatus{},
},
Expand Down
8 changes: 4 additions & 4 deletions mysqld_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func init() {
prometheus.MustRegister(version.NewCollector("mysqld_exporter"))
}

func newHandler(metrics collector.Metrics, scrapers []collector.Scraper, logger log.Logger) http.HandlerFunc {
func newHandler(scrapers []collector.Scraper, logger log.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
var dsn string
var err error
Expand Down Expand Up @@ -176,7 +176,7 @@ func newHandler(metrics collector.Metrics, scrapers []collector.Scraper, logger

registry := prometheus.NewRegistry()

registry.MustRegister(collector.New(ctx, dsn, metrics, filteredScrapers, logger))
registry.MustRegister(collector.New(ctx, dsn, filteredScrapers, logger))

gatherers := prometheus.Gatherers{
prometheus.DefaultGatherer,
Expand Down Expand Up @@ -230,7 +230,7 @@ func main() {
enabledScrapers = append(enabledScrapers, scraper)
}
}
handlerFunc := newHandler(collector.NewMetrics(), enabledScrapers, logger)
handlerFunc := newHandler(enabledScrapers, logger)
http.Handle(*metricsPath, promhttp.InstrumentMetricHandler(prometheus.DefaultRegisterer, handlerFunc))
if *metricsPath != "/" && *metricsPath != "" {
landingConfig := web.LandingConfig{
Expand All @@ -251,7 +251,7 @@ func main() {
}
http.Handle("/", landingPage)
}
http.HandleFunc("/probe", handleProbe(collector.NewMetrics(), enabledScrapers, logger))
http.HandleFunc("/probe", handleProbe(enabledScrapers, logger))

srv := &http.Server{}
if err := web.ListenAndServe(srv, toolkitFlags, logger); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import (
"github.com/prometheus/mysqld_exporter/collector"
)

func handleProbe(metrics collector.Metrics, scrapers []collector.Scraper, logger log.Logger) http.HandlerFunc {
func handleProbe(scrapers []collector.Scraper, logger log.Logger) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
ctx := r.Context()
params := r.URL.Query()
Expand Down Expand Up @@ -57,7 +57,7 @@ func handleProbe(metrics collector.Metrics, scrapers []collector.Scraper, logger
filteredScrapers := filterScrapers(scrapers, collectParams)

registry := prometheus.NewRegistry()
registry.MustRegister(collector.New(ctx, dsn, metrics, filteredScrapers, logger))
registry.MustRegister(collector.New(ctx, dsn, filteredScrapers, logger))

h := promhttp.HandlerFor(registry, promhttp.HandlerOpts{})
h.ServeHTTP(w, r)
Expand Down

0 comments on commit 6ad9f25

Please sign in to comment.