Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Aggregate perf metrics #2611

Merged
merged 1 commit into from
Aug 17, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/runtime_options.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ cAdvisor stores the latest historical data in memory. How long of a history it s
--perf_events_config="" Path to a JSON file containing configuration of perf events to measure. Empty value disables perf events measuring.
```

Core perf events can be exposed on Prometheus endpoint per CPU or aggregated by event. It is controlled through `--disable_metrics` parameter with option `percpu`, e.g.:
- `--disable_metrics="percpu"` - core perf events are aggregated
- `--disable_metrics=""` - core perf events are exposed per CPU.

Aggregated form of core perf events significantly decrease volume of data. For aggregated form of core perf events scaling ratio (`container_perf_metric_scaling ratio`) indicates the lowest value of scaling ratio for specific event to show the worst precision.

### Perf subsystem introduction

One of the goals of kernel perf subsystem is to instrument CPU performance counters that allow to profile applications.
Expand Down
142 changes: 108 additions & 34 deletions metrics/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -1577,41 +1577,48 @@ func NewPrometheusCollector(i infoProvider, f ContainerLabelsFunc, includedMetri
}...)
}
if includedMetrics.Has(container.PerfMetrics) {
if includedMetrics.Has(container.PerCpuUsageMetrics) {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getPerCPUCorePerfEvents(s)
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getPerCPUCoreScalingRatio(s)
},
}}...)
} else {
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getAggregatedCorePerfEvents(s)
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
return getMinCoreScalingRatio(s)
},
}}...)
}
c.containerMetrics = append(c.containerMetrics, []containerMetric{
{
name: "container_perf_events_total",
help: "Perf event metric.",
valueType: prometheus.CounterValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_events_scaling_ratio",
help: "Perf event metric scaling ratio.",
valueType: prometheus.GaugeValue,
extraLabels: []string{"cpu", "event"},
getValues: func(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
},
},
{
name: "container_perf_uncore_events_total",
help: "Perf uncore event metric.",
Expand Down Expand Up @@ -1940,3 +1947,70 @@ func getNumaStatsPerNode(nodeStats map[uint8]uint64, labels []string, timestamp
}
return mValues
}

func getPerCPUCorePerfEvents(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: float64(metric.Value),
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
}

func getPerCPUCoreScalingRatio(s *info.ContainerStats) metricValues {
values := make(metricValues, 0, len(s.PerfStats))
for _, metric := range s.PerfStats {
values = append(values, metricValue{
value: metric.ScalingRatio,
labels: []string{strconv.Itoa(metric.Cpu), metric.Name},
timestamp: s.Timestamp,
})
}
return values
}

func getAggregatedCorePerfEvents(s *info.ContainerStats) metricValues {
values := make(metricValues, 0)

perfEventStatAgg := make(map[string]uint64)
// aggregate by event
for _, perfStat := range s.PerfStats {
perfEventStatAgg[perfStat.Name] += perfStat.Value
}
// create aggregated metrics
for perfEvent, perfValue := range perfEventStatAgg {
values = append(values, metricValue{
value: float64(perfValue),
labels: []string{"", perfEvent},
timestamp: s.Timestamp,
})
}
return values
}

func getMinCoreScalingRatio(s *info.ContainerStats) metricValues {
values := make(metricValues, 0)
perfEventStatMin := make(map[string]float64)
// search for minimal value of scalin ratio for specific event
for _, perfStat := range s.PerfStats {
if _, ok := perfEventStatMin[perfStat.Name]; !ok {
// found a new event
perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio
} else if perfStat.ScalingRatio < perfEventStatMin[perfStat.Name] {
// found a lower value of scaling ration so replace the minimal value
perfEventStatMin[perfStat.Name] = perfStat.ScalingRatio
}
}

for perfEvent, perfScalingRatio := range perfEventStatMin {
values = append(values, metricValue{
value: perfScalingRatio,
labels: []string{"", perfEvent},
timestamp: s.Timestamp,
})
}
return values
}
197 changes: 197 additions & 0 deletions metrics/prometheus_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,21 @@ func TestPrometheusCollector(t *testing.T) {
testPrometheusCollector(t, reg, "testdata/prometheus_metrics")
}

func TestPrometheusCollectorWithPerfAggregated(t *testing.T) {
Copy link
Collaborator Author

@katarzyna-z katarzyna-z Aug 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dashpole Unit tests are available in this file. Do you think that I should add something?

metrics := container.MetricSet{
container.PerfMetrics: struct{}{},
}
c := NewPrometheusCollector(testSubcontainersInfoProvider{}, func(container *info.ContainerInfo) map[string]string {
s := DefaultContainerLabels(container)
s["zone.name"] = "hello"
return s
}, metrics, now, v2.RequestOptions{})
reg := prometheus.NewRegistry()
reg.MustRegister(c)

testPrometheusCollector(t, reg, "testdata/prometheus_metrics_perf_aggregated")
}

func testPrometheusCollector(t *testing.T, gatherer prometheus.Gatherer, metricsFile string) {
wantMetrics, err := os.Open(metricsFile)
if err != nil {
Expand Down Expand Up @@ -122,3 +137,185 @@ func (m *mockInfoProvider) GetMachineInfo() (*info.MachineInfo, error) {
func mockLabelFunc(*info.ContainerInfo) map[string]string {
return map[string]string{}
}

func TestGetPerCpuCorePerfEvents(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions",
},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions",
},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getPerCPUCorePerfEvents(containerStats)
assert.Equal(t, 4, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 123.0)
assert.Contains(t, values, 456.0)
assert.Contains(t, values, 321.0)
assert.Contains(t, values, 789.0)
}

func TestGetPerCpuCoreScalingRatio(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getPerCPUCoreScalingRatio(containerStats)
assert.Equal(t, 4, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 1.0)
assert.Contains(t, values, 0.5)
assert.Contains(t, values, 0.7)
assert.Contains(t, values, 0.3)
}

func TestGetAggCorePerfEvents(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getAggregatedCorePerfEvents(containerStats)
assert.Equal(t, 2, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 579.0)
assert.Contains(t, values, 1110.0)
}

func TestGetMinCoreScalingRatio(t *testing.T) {
containerStats := &info.ContainerStats{
Timestamp: time.Unix(1395066367, 0),
PerfStats: []info.PerfStat{
{
PerfValue: info.PerfValue{
ScalingRatio: 1.0,
Value: 123,
Name: "instructions"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.5,
Value: 456,
Name: "instructions"},
Cpu: 1,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.7,
Value: 321,
Name: "instructions_retired"},
Cpu: 0,
},
{
PerfValue: info.PerfValue{
ScalingRatio: 0.3,
Value: 789,
Name: "instructions_retired"},
Cpu: 1,
},
},
}
metricVals := getMinCoreScalingRatio(containerStats)
assert.Equal(t, 2, len(metricVals))
values := []float64{}
for _, metric := range metricVals {
values = append(values, metric.value)
}
assert.Contains(t, values, 0.5)
assert.Contains(t, values, 0.3)
}
Loading