Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cpu: Metric 'package_throttles_total' is per package. #657

Merged
merged 4 commits into from
Sep 7, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 49 additions & 13 deletions collector/cpu_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,11 @@ package collector

import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"regexp"
"strings"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/common/log"
Expand All @@ -29,6 +32,10 @@ const (
cpuCollectorNamespace = "cpu"
)

var (
digitRegexp = regexp.MustCompile("[0-9]+")
)

type cpuCollector struct {
cpu *prometheus.Desc
cpuFreq *prometheus.Desc
Expand Down Expand Up @@ -65,6 +72,7 @@ func NewCPUCollector() (Collector, error) {
"Maximum cpu thread frequency in hertz.",
[]string{"cpu"}, nil,
),
// FIXME: This should be a per core metric, not per cpu!
cpuCoreThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "core_throttles_total"),
"Number of times this cpu core has been throttled.",
Expand All @@ -73,7 +81,7 @@ func NewCPUCollector() (Collector, error) {
cpuPackageThrottle: prometheus.NewDesc(
prometheus.BuildFQName(Namespace, cpuCollectorNamespace, "package_throttles_total"),
"Number of times this cpu package has been throttled.",
[]string{"cpu"}, nil,
[]string{"node"}, nil,
),
}, nil
}
Expand All @@ -98,6 +106,7 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {

var value uint64

// cpu loop
for _, cpu := range cpus {
_, cpuname := filepath.Split(cpu)

Expand All @@ -106,35 +115,62 @@ func (c *cpuCollector) updateCPUfreq(ch chan<- prometheus.Metric) error {
} else {
// sysfs cpufreq values are kHz, thus multiply by 1000 to export base units (hz).
// See https://www.kernel.org/doc/Documentation/cpu-freq/user-guide.txt
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_cur_freq")); err != nil {
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_cur_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreq, prometheus.GaugeValue, float64(value)*1000.0, cpuname)

if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_min_freq")); err != nil {
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_min_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMin, prometheus.GaugeValue, float64(value)*1000.0, cpuname)

if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq/scaling_max_freq")); err != nil {
if value, err = readUintFromFile(filepath.Join(cpu, "cpufreq", "scaling_max_freq")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuFreqMax, prometheus.GaugeValue, float64(value)*1000.0, cpuname)
}

if _, err := os.Stat(filepath.Join(cpu, "thermal_throttle")); os.IsNotExist(err) {
log.Debugf("CPU %q is missing thermal_throttle", cpu)
} else {
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/core_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
continue
}
if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle", "core_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuCoreThrottle, prometheus.CounterValue, float64(value), cpuname)
}

if value, err = readUintFromFile(filepath.Join(cpu, "thermal_throttle/package_throttle_count")); err != nil {
return err
}
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), cpuname)
pkgs, err := filepath.Glob(sysFilePath("bus/node/devices/node[0-9]*"))
if err != nil {
return err
}

// package/node loop
for _, pkg := range pkgs {
if _, err := os.Stat(filepath.Join(pkg, "cpulist")); os.IsNotExist(err) {
log.Debugf("package %q is missing cpulist", pkg)
continue
}
cpulist, err := ioutil.ReadFile(filepath.Join(pkg, "cpulist"))
if err != nil {
log.Debugf("could not read cpulist of package %q", pkg)
return err
}
// cpulist example of one package/node with HT: "0-11,24-35"
line := strings.Split(string(cpulist), "\n")[0]
firstCPU := strings.FieldsFunc(line, func(r rune) bool {
return r == '-' || r == ','
})[0]
if _, err := os.Stat(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); os.IsNotExist(err) {
log.Debugf("Package %q CPU %q is missing package_throttle", pkg, firstCPU)
continue
}
if value, err = readUintFromFile(filepath.Join(pkg, "cpu"+firstCPU, "thermal_throttle", "package_throttle_count")); err != nil {
return err
}
pkgno := digitRegexp.FindAllString(pkg, 1)[0]
ch <- prometheus.MustNewConstMetric(c.cpuPackageThrottle, prometheus.CounterValue, float64(value), pkgno)
}

return nil
Expand Down
4 changes: 1 addition & 3 deletions collector/fixtures/e2e-output.txt
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,7 @@ node_cpu_frequency_min_hertz{cpu="cpu1"} 8e+08
node_cpu_frequency_min_hertz{cpu="cpu3"} 1e+06
# HELP node_cpu_package_throttles_total Number of times this cpu package has been throttled.
# TYPE node_cpu_package_throttles_total counter
node_cpu_package_throttles_total{cpu="cpu0"} 30
node_cpu_package_throttles_total{cpu="cpu1"} 30
node_cpu_package_throttles_total{cpu="cpu2"} 6
node_cpu_package_throttles_total{node="0"} 30
# HELP node_disk_bytes_read The total number of bytes read successfully.
# TYPE node_disk_bytes_read counter
node_disk_bytes_read{device="dm-0"} 5.13708655616e+11
Expand Down
36 changes: 36 additions & 0 deletions collector/fixtures/sys.ttar
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,42 @@ Lines: 1
1000
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node0
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node0/cpu0
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node0/cpu0/thermal_throttle
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node0/cpu0/thermal_throttle/package_throttle_count
Lines: 1
30
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node0/cpu1
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/bus/node/devices/node0/cpu1/thermal_throttle
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node0/cpu1/thermal_throttle/package_throttle_count
Lines: 1
30
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Path: sys/bus/node/devices/node0/cpulist
Lines: 1
0-3
Mode: 644
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Directory: sys/class
Mode: 755
# ttar - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
Expand Down