Skip to content

Commit 5673ffc

Browse files
authored
Merge pull request #435 from sburke-at-ziprecruiter/sburke.fb-225844-replica_host_status
Add collector for AWS Aurora information_schema.replica_host_status
2 parents 72c32a4 + c03d5c6 commit 5673ffc

4 files changed

+221
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ collect.info_schema.innodb_cmpmem | 5.5 | C
6565
collect.info_schema.processlist | 5.1 | Collect thread state counts from information_schema.processlist.
6666
collect.info_schema.processlist.min_time | 5.1 | Minimum time a thread must be in each state to be counted. (default: 0)
6767
collect.info_schema.query_response_time | 5.5 | Collect query response time distribution if query_response_time_stats is ON.
68+
collect.info_schema.replica_host | 5.6 | Collect metrics from information_schema.replica_host_status.
6869
collect.info_schema.tables | 5.1 | Collect metrics from information_schema.tables.
6970
collect.info_schema.tables.databases | 5.1 | The list of databases to collect table stats for, or '`*`' for all.
7071
collect.info_schema.tablestats | 5.1 | If running with userstat=1, set to true to collect table statistics.

collector/info_schema_replica_host.go

+147
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
// Copyright 2020 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
// Scrape `information_schema.replica_host_status`.
15+
16+
package collector
17+
18+
import (
19+
"context"
20+
"database/sql"
21+
22+
"github.com/go-kit/kit/log"
23+
"github.com/go-kit/kit/log/level"
24+
MySQL "github.com/go-sql-driver/mysql"
25+
"github.com/prometheus/client_golang/prometheus"
26+
)
27+
28+
const replicaHostQuery = `
29+
SELECT SERVER_ID
30+
, if(SESSION_ID='MASTER_SESSION_ID','writer','reader') AS ROLE
31+
, CPU
32+
, MASTER_SLAVE_LATENCY_IN_MICROSECONDS
33+
, REPLICA_LAG_IN_MILLISECONDS
34+
, LOG_STREAM_SPEED_IN_KiB_PER_SECOND
35+
, CURRENT_REPLAY_LATENCY_IN_MICROSECONDS
36+
FROM information_schema.replica_host_status
37+
`
38+
39+
// Metric descriptors.
40+
var (
41+
infoSchemaReplicaHostCpuDesc = prometheus.NewDesc(
42+
prometheus.BuildFQName(namespace, informationSchema, "replica_host_cpu_percent"),
43+
"The CPU usage as a percentage.",
44+
[]string{"server_id", "role"}, nil,
45+
)
46+
infoSchemaReplicaHostSlaveLatencyDesc = prometheus.NewDesc(
47+
prometheus.BuildFQName(namespace, informationSchema, "replica_host_slave_latency_seconds"),
48+
"The master-slave latency in seconds.",
49+
[]string{"server_id", "role"}, nil,
50+
)
51+
infoSchemaReplicaHostLagDesc = prometheus.NewDesc(
52+
prometheus.BuildFQName(namespace, informationSchema, "replica_host_lag_seconds"),
53+
"The replica lag in seconds.",
54+
[]string{"server_id", "role"}, nil,
55+
)
56+
infoSchemaReplicaHostLogStreamSpeedDesc = prometheus.NewDesc(
57+
prometheus.BuildFQName(namespace, informationSchema, "replica_host_log_stream_speed"),
58+
"The log stream speed in kilobytes per second.",
59+
[]string{"server_id", "role"}, nil,
60+
)
61+
infoSchemaReplicaHostReplayLatencyDesc = prometheus.NewDesc(
62+
prometheus.BuildFQName(namespace, informationSchema, "replica_host_replay_latency_seconds"),
63+
"The current replay latency in seconds.",
64+
[]string{"server_id", "role"}, nil,
65+
)
66+
)
67+
68+
// ScrapeReplicaHost collects from `information_schema.replica_host_status`.
69+
type ScrapeReplicaHost struct{}
70+
71+
// Name of the Scraper. Should be unique.
72+
func (ScrapeReplicaHost) Name() string {
73+
return "info_schema.replica_host"
74+
}
75+
76+
// Help describes the role of the Scraper.
77+
func (ScrapeReplicaHost) Help() string {
78+
return "Collect metrics from information_schema.replica_host_status"
79+
}
80+
81+
// Version of MySQL from which scraper is available.
82+
func (ScrapeReplicaHost) Version() float64 {
83+
return 5.6
84+
}
85+
86+
// Scrape collects data from database connection and sends it over channel as prometheus metric.
87+
func (ScrapeReplicaHost) Scrape(ctx context.Context, db *sql.DB, ch chan<- prometheus.Metric, logger log.Logger) error {
88+
replicaHostRows, err := db.QueryContext(ctx, replicaHostQuery)
89+
if err != nil {
90+
if mysqlErr, ok := err.(*MySQL.MySQLError); ok { // Now the error number is accessible directly
91+
// Check for error 1109: Unknown table
92+
if mysqlErr.Number == 1109 {
93+
level.Debug(logger).Log("msg", "information_schema.replica_host_status is not available.")
94+
return nil
95+
}
96+
}
97+
return err
98+
}
99+
defer replicaHostRows.Close()
100+
101+
var (
102+
serverId string
103+
role string
104+
cpu float64
105+
slaveLatency uint64
106+
replicaLag float64
107+
logStreamSpeed float64
108+
replayLatency uint64
109+
)
110+
for replicaHostRows.Next() {
111+
if err := replicaHostRows.Scan(
112+
&serverId,
113+
&role,
114+
&cpu,
115+
&slaveLatency,
116+
&replicaLag,
117+
&logStreamSpeed,
118+
&replayLatency,
119+
); err != nil {
120+
return err
121+
}
122+
ch <- prometheus.MustNewConstMetric(
123+
infoSchemaReplicaHostCpuDesc, prometheus.GaugeValue, cpu,
124+
serverId, role,
125+
)
126+
ch <- prometheus.MustNewConstMetric(
127+
infoSchemaReplicaHostSlaveLatencyDesc, prometheus.GaugeValue, float64(slaveLatency)*0.000001,
128+
serverId, role,
129+
)
130+
ch <- prometheus.MustNewConstMetric(
131+
infoSchemaReplicaHostLagDesc, prometheus.GaugeValue, replicaLag*0.001,
132+
serverId, role,
133+
)
134+
ch <- prometheus.MustNewConstMetric(
135+
infoSchemaReplicaHostLogStreamSpeedDesc, prometheus.GaugeValue, logStreamSpeed,
136+
serverId, role,
137+
)
138+
ch <- prometheus.MustNewConstMetric(
139+
infoSchemaReplicaHostReplayLatencyDesc, prometheus.GaugeValue, float64(replayLatency)*0.000001,
140+
serverId, role,
141+
)
142+
}
143+
return nil
144+
}
145+
146+
// check interface
147+
var _ Scraper = ScrapeReplicaHost{}
+72
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Copyright 2020 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package collector
15+
16+
import (
17+
"context"
18+
"testing"
19+
20+
"github.com/DATA-DOG/go-sqlmock"
21+
"github.com/go-kit/kit/log"
22+
"github.com/prometheus/client_golang/prometheus"
23+
dto "github.com/prometheus/client_model/go"
24+
"github.com/smartystreets/goconvey/convey"
25+
)
26+
27+
func TestScrapeReplicaHost(t *testing.T) {
28+
db, mock, err := sqlmock.New()
29+
if err != nil {
30+
t.Fatalf("error opening a stub database connection: %s", err)
31+
}
32+
defer db.Close()
33+
34+
columns := []string{"SERVER_ID", "ROLE", "CPU", "MASTER_SLAVE_LATENCY_IN_MICROSECONDS", "REPLICA_LAG_IN_MILLISECONDS", "LOG_STREAM_SPEED_IN_KiB_PER_SECOND", "CURRENT_REPLAY_LATENCY_IN_MICROSECONDS"}
35+
rows := sqlmock.NewRows(columns).
36+
AddRow("dbtools-cluster-us-west-2c", "reader", 1.2531328201293945, 250000, 20.069000244140625, 2.0368164549078225, 500000).
37+
AddRow("dbtools-cluster-writer", "writer", 1.9607843160629272, 250000, 0, 2.0368164549078225, 0)
38+
mock.ExpectQuery(sanitizeQuery(replicaHostQuery)).WillReturnRows(rows)
39+
40+
ch := make(chan prometheus.Metric)
41+
go func() {
42+
if err = (ScrapeReplicaHost{}).Scrape(context.Background(), db, ch, log.NewNopLogger()); err != nil {
43+
t.Errorf("error calling function on test: %s", err)
44+
}
45+
close(ch)
46+
}()
47+
48+
expected := []MetricResult{
49+
{labels: labelMap{"server_id": "dbtools-cluster-us-west-2c", "role": "reader"}, value: 1.2531328201293945, metricType: dto.MetricType_GAUGE},
50+
{labels: labelMap{"server_id": "dbtools-cluster-us-west-2c", "role": "reader"}, value: 0.25, metricType: dto.MetricType_GAUGE},
51+
{labels: labelMap{"server_id": "dbtools-cluster-us-west-2c", "role": "reader"}, value: 0.020069000244140625, metricType: dto.MetricType_GAUGE},
52+
{labels: labelMap{"server_id": "dbtools-cluster-us-west-2c", "role": "reader"}, value: 2.0368164549078225, metricType: dto.MetricType_GAUGE},
53+
{labels: labelMap{"server_id": "dbtools-cluster-us-west-2c", "role": "reader"}, value: 0.5, metricType: dto.MetricType_GAUGE},
54+
55+
{labels: labelMap{"server_id": "dbtools-cluster-writer", "role": "writer"}, value: 1.9607843160629272, metricType: dto.MetricType_GAUGE},
56+
{labels: labelMap{"server_id": "dbtools-cluster-writer", "role": "writer"}, value: 0.25, metricType: dto.MetricType_GAUGE},
57+
{labels: labelMap{"server_id": "dbtools-cluster-writer", "role": "writer"}, value: 0.0, metricType: dto.MetricType_GAUGE},
58+
{labels: labelMap{"server_id": "dbtools-cluster-writer", "role": "writer"}, value: 2.0368164549078225, metricType: dto.MetricType_GAUGE},
59+
{labels: labelMap{"server_id": "dbtools-cluster-writer", "role": "writer"}, value: 0.0, metricType: dto.MetricType_GAUGE},
60+
}
61+
convey.Convey("Metrics comparison", t, func() {
62+
for _, expect := range expected {
63+
got := readMetric(<-ch)
64+
convey.So(expect, convey.ShouldResemble, got)
65+
}
66+
})
67+
68+
// Ensure all SQL queries were executed
69+
if err := mock.ExpectationsWereMet(); err != nil {
70+
t.Errorf("there were unfulfilled exceptions: %s", err)
71+
}
72+
}

mysqld_exporter.go

+1
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ var scrapers = map[collector.Scraper]bool{
9696
collector.ScrapeEngineInnodbStatus{}: false,
9797
collector.ScrapeHeartbeat{}: false,
9898
collector.ScrapeSlaveHosts{}: false,
99+
collector.ScrapeReplicaHost{}: false,
99100
}
100101

101102
func parseMycnf(config interface{}) (string, error) {

0 commit comments

Comments
 (0)