From ec922e8603975b08e506ab87bc15e483a88dec86 Mon Sep 17 00:00:00 2001 From: Ben Kochie Date: Wed, 19 Feb 2020 16:11:29 +0100 Subject: [PATCH] Don't count empty collection as success (#1613) Many collectors depend on underlying features to be enabled. This causes confusion about what "success" means. This changes the behavior of the `node_scrape_collector_success` metric. * When a collector is unable to find data don't return success. * Catch the no data error and send to Debug log level to avoid log spam. * Update collectors to support this new functionality. * Fix copy-pasta mistake in infiband debug message. Closes: https://github.com/prometheus/node_exporter/issues/1323 Signed-off-by: Ben Kochie --- CHANGELOG.md | 2 ++ collector/bonding_linux.go | 2 +- collector/collector.go | 14 +++++++++++++- collector/drbd_linux.go | 2 +- collector/hwmon_linux.go | 2 +- collector/infiniband_linux.go | 4 ++-- collector/ipvs_linux.go | 2 +- collector/mdadm_linux.go | 2 +- collector/nfs_linux.go | 2 +- collector/nfsd_linux.go | 2 +- collector/wifi_linux.go | 4 ++-- 11 files changed, 26 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0c810a987..90a8eb449e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,12 +10,14 @@ - `node_md_is_active` is replaced by `node_md_state` with a state set of "active", "inactive", "recovering", "resync". * Additional label `mountaddr` added to NFS device metrics to distinguish mounts from the same URL, but different IP addresses. #1417 * Metrics node_cpu_scaling_frequency_min_hrts and node_cpu_scaling_frequency_max_hrts of the cpufreq collector were renamed to node_cpu_scaling_frequency_min_hertz and node_cpu_scaling_frequency_max_hertz. #1510 +* Collectors that are enabled, but are unable to find data to collect, now return 0 for `node_scrape_collector_success`. ### Changes * [CHANGE] Add `--collector.netdev.device-whitelist`. #1279 * [CHANGE] Refactor mdadm collector #1403 * [CHANGE] Add `mountaddr` label to NFS metrics. #1417 +* [CHANGE] Don't count empty collectors as success. #... * [FEATURE] Add new schedstat collector #1389 * [FEATURE] Add uname support for Darwin and OpenBSD #1433 * [FEATURE] Add new metric node_cpu_info #1489 diff --git a/collector/bonding_linux.go b/collector/bonding_linux.go index 771786bf82..78e94b2f8d 100644 --- a/collector/bonding_linux.go +++ b/collector/bonding_linux.go @@ -61,7 +61,7 @@ func (c *bondingCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "Not collecting bonding, file does not exist", "file", statusfile) - return nil + return ErrNoData } return err } diff --git a/collector/collector.go b/collector/collector.go index af8712c00e..0fec09c9d3 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -15,6 +15,7 @@ package collector import ( + "errors" "fmt" "sync" "time" @@ -131,7 +132,11 @@ func execute(name string, c Collector, ch chan<- prometheus.Metric, logger log.L var success float64 if err != nil { - level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + if IsNoDataError(err) { + level.Debug(logger).Log("msg", "collector returned no data", "name", name, "duration_seconds", duration.Seconds(), "err", err) + } else { + level.Error(logger).Log("msg", "collector failed", "name", name, "duration_seconds", duration.Seconds(), "err", err) + } success = 0 } else { level.Debug(logger).Log("msg", "collector succeeded", "name", name, "duration_seconds", duration.Seconds()) @@ -155,3 +160,10 @@ type typedDesc struct { func (d *typedDesc) mustNewConstMetric(value float64, labels ...string) prometheus.Metric { return prometheus.MustNewConstMetric(d.desc, d.valueType, value, labels...) } + +// ErrNoData indicates the collector found no data to collect, but had no other error. +var ErrNoData = errors.New("collector returned no data") + +func IsNoDataError(err error) bool { + return err == ErrNoData +} diff --git a/collector/drbd_linux.go b/collector/drbd_linux.go index 5a340b9f02..6815c5f0e0 100644 --- a/collector/drbd_linux.go +++ b/collector/drbd_linux.go @@ -188,7 +188,7 @@ func (c *drbdCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "stats file does not exist, skipping", "file", statsFile, "err", err) - return nil + return ErrNoData } return err diff --git a/collector/hwmon_linux.go b/collector/hwmon_linux.go index 31dbd22177..564994240a 100644 --- a/collector/hwmon_linux.go +++ b/collector/hwmon_linux.go @@ -426,7 +426,7 @@ func (c *hwMonCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "hwmon collector metrics are not available for this system") - return nil + return ErrNoData } return err diff --git a/collector/infiniband_linux.go b/collector/infiniband_linux.go index 4fa68ba6d1..1f453b8202 100644 --- a/collector/infiniband_linux.go +++ b/collector/infiniband_linux.go @@ -109,8 +109,8 @@ func (c *infinibandCollector) Update(ch chan<- prometheus.Metric) error { devices, err := c.fs.InfiniBandClass() if err != nil { if os.IsNotExist(err) { - level.Debug(c.logger).Log("msg", "IPv4 sockstat statistics not found, skipping") - return nil + level.Debug(c.logger).Log("msg", "infiniband statistics not found, skipping") + return ErrNoData } return fmt.Errorf("error obtaining InfiniBand class info: %s", err) } diff --git a/collector/ipvs_linux.go b/collector/ipvs_linux.go index 89469e52ea..11adf0fa7a 100644 --- a/collector/ipvs_linux.go +++ b/collector/ipvs_linux.go @@ -115,7 +115,7 @@ func (c *ipvsCollector) Update(ch chan<- prometheus.Metric) error { // Cannot access ipvs metrics, report no error. if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "ipvs collector metrics are not available for this system") - return nil + return ErrNoData } return fmt.Errorf("could not get IPVS stats: %s", err) } diff --git a/collector/mdadm_linux.go b/collector/mdadm_linux.go index faf1cee242..05f83eec9b 100644 --- a/collector/mdadm_linux.go +++ b/collector/mdadm_linux.go @@ -105,7 +105,7 @@ func (c *mdadmCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "Not collecting mdstat, file does not exist", "file", *procPath) - return nil + return ErrNoData } return fmt.Errorf("error parsing mdstatus: %s", err) diff --git a/collector/nfs_linux.go b/collector/nfs_linux.go index 7ed493083b..55f9e191b4 100644 --- a/collector/nfs_linux.go +++ b/collector/nfs_linux.go @@ -97,7 +97,7 @@ func (c *nfsCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "Not collecting NFS metrics", "err", err) - return nil + return ErrNoData } return fmt.Errorf("failed to retrieve nfs stats: %w", err) } diff --git a/collector/nfsd_linux.go b/collector/nfsd_linux.go index 234e1da202..b6f2f8eed7 100644 --- a/collector/nfsd_linux.go +++ b/collector/nfsd_linux.go @@ -63,7 +63,7 @@ func (c *nfsdCollector) Update(ch chan<- prometheus.Metric) error { if err != nil { if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "Not collecting NFSd metrics", "err", err) - return nil + return ErrNoData } return fmt.Errorf("failed to retrieve nfsd stats: %w", err) } diff --git a/collector/wifi_linux.go b/collector/wifi_linux.go index 5a5a86ce39..118e714331 100644 --- a/collector/wifi_linux.go +++ b/collector/wifi_linux.go @@ -167,11 +167,11 @@ func (c *wifiCollector) Update(ch chan<- prometheus.Metric) error { // Cannot access wifi metrics, report no error. if os.IsNotExist(err) { level.Debug(c.logger).Log("msg", "wifi collector metrics are not available for this system") - return nil + return ErrNoData } if os.IsPermission(err) { level.Debug(c.logger).Log("msg", "wifi collector got permission denied when accessing metrics") - return nil + return ErrNoData } return fmt.Errorf("failed to access wifi data: %w", err)