diff --git a/metrics/grafana/pd.json b/metrics/grafana/pd.json index 5bab58b707d..51408770f32 100644 --- a/metrics/grafana/pd.json +++ b/metrics/grafana/pd.json @@ -796,7 +796,7 @@ "intervalFactor": 2, "legendFormat": "{{type}}", "refId": "B" - }, + } ], "thresholds": [ { diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index 0cc4d2e4264..8a64c28586c 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -52,8 +52,8 @@ const ( maxScheduleRetries = 10 maxLoadConfigRetries = 10 - patrolScanRegionLimit = 128 // It takes about 14 minutes to iterate 1 million regions. - statsScanRegionLimit = 1000 + patrolScanRegionLimit = 128 // It takes about 14 minutes to iterate 1 million regions. + statsScanRegionLimit = 1000 // It takes about 3.5 minutes to iterate 2 million regions. defaultScrapInterval = 15 * time.Second // PluginLoad means action for load plugin PluginLoad = "PluginLoad" @@ -231,6 +231,7 @@ func (c *coordinator) collectRegionStats() { case <-timer.C: } } + collectRegionStatsGauge.Set(time.Since(start).Seconds()) start = time.Now() } } diff --git a/server/cluster/metrics.go b/server/cluster/metrics.go index 8c0bceb94ca..19f09ce53f0 100644 --- a/server/cluster/metrics.go +++ b/server/cluster/metrics.go @@ -73,6 +73,14 @@ var ( Help: "Time spent of patrol checks region.", }) + collectRegionStatsGauge = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: "pd", + Subsystem: "statistics", + Name: "collect_region_stats_time", + Help: "Time spent of collecting region stats.", + }) + updateStoreStatsGauge = prometheus.NewGauge( prometheus.GaugeOpts{ Namespace: "pd", @@ -152,4 +160,5 @@ func init() { prometheus.MustRegister(storesETAGauge) prometheus.MustRegister(storeSyncConfigEvent) prometheus.MustRegister(updateStoreStatsGauge) + prometheus.MustRegister(collectRegionStatsGauge) }