Skip to content

Commit

Permalink
libct/cg/stats: support PSI for cgroup v2
Browse files Browse the repository at this point in the history
We read output from the following files if they exists:
- cpu.pressure
- memory.pressure
- io.pressure

Each are in format:

```
some avg10=0.00 avg60=0.00 avg300=0.00 total=0
full avg10=0.00 avg60=0.00 avg300=0.00 total=0
```

Signed-off-by: Daniel Dao <dqminh89@gmail.com>
Co-authored-by: Sandor Szücs <sandor.szuecs@zalando.de>
Co-authored-by: Kir Kolyshkin <kolyshkin@gmail.com>
Signed-off-by: Kir Kolyshkin <kolyshkin@gmail.com>
  • Loading branch information
3 people committed Jun 13, 2023
1 parent 2f42992 commit 1aa7ca8
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 1 deletion.
3 changes: 3 additions & 0 deletions events.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.CPU.Throttling.Periods = cg.CpuStats.ThrottlingData.Periods
s.CPU.Throttling.ThrottledPeriods = cg.CpuStats.ThrottlingData.ThrottledPeriods
s.CPU.Throttling.ThrottledTime = cg.CpuStats.ThrottlingData.ThrottledTime
s.CPU.PSI = cg.CpuStats.PSI

s.CPUSet = types.CPUSet(cg.CPUSetStats)

Expand All @@ -138,6 +139,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Memory.Swap = convertMemoryEntry(cg.MemoryStats.SwapUsage)
s.Memory.Usage = convertMemoryEntry(cg.MemoryStats.Usage)
s.Memory.Raw = cg.MemoryStats.Stats
s.Memory.PSI = cg.MemoryStats.PSI

s.Blkio.IoServiceBytesRecursive = convertBlkioEntry(cg.BlkioStats.IoServiceBytesRecursive)
s.Blkio.IoServicedRecursive = convertBlkioEntry(cg.BlkioStats.IoServicedRecursive)
Expand All @@ -147,6 +149,7 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
s.Blkio.IoMergedRecursive = convertBlkioEntry(cg.BlkioStats.IoMergedRecursive)
s.Blkio.IoTimeRecursive = convertBlkioEntry(cg.BlkioStats.IoTimeRecursive)
s.Blkio.SectorsRecursive = convertBlkioEntry(cg.BlkioStats.SectorsRecursive)
s.Blkio.PSI = cg.BlkioStats.PSI

s.Hugetlb = make(map[string]types.Hugetlb)
for k, v := range cg.HugetlbStats {
Expand Down
11 changes: 11 additions & 0 deletions libcontainer/cgroups/fs2/fs2.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,17 @@ func (m *Manager) GetStats() (*cgroups.Stats, error) {
if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
}
// PSI (since kernel 4.20).
var err error
if st.CpuStats.PSI, err = statPSI(m.dirPath, "cpu.pressure"); err != nil {
errs = append(errs, err)
}
if st.MemoryStats.PSI, err = statPSI(m.dirPath, "memory.pressure"); err != nil {
errs = append(errs, err)
}
if st.BlkioStats.PSI, err = statPSI(m.dirPath, "io.pressure"); err != nil {
errs = append(errs, err)
}
// hugetlb (since kernel 5.6)
if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) {
errs = append(errs, err)
Expand Down
89 changes: 89 additions & 0 deletions libcontainer/cgroups/fs2/psi.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
package fs2

import (
"bufio"
"errors"
"fmt"
"os"
"strconv"
"strings"

"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

func statPSI(dirPath string, file string) (*cgroups.PSIStats, error) {
f, err := cgroups.OpenFile(dirPath, file, os.O_RDONLY)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
// Kernel < 4.20, or CONFIG_PSI is not set,
// or PSI stats are turned off for the cgroup
// ("echo 0 > cgroup.pressure", kernel >= 6.1).
return nil, nil
}
return nil, err
}
defer f.Close()

var psistats cgroups.PSIStats
sc := bufio.NewScanner(f)
for sc.Scan() {
parts := strings.Fields(sc.Text())
var pv *cgroups.PSIData
switch parts[0] {
case "some":
pv = &psistats.Some
case "full":
pv = &psistats.Full
}
if pv != nil {
*pv, err = parsePSIData(parts[1:])
if err != nil {
return nil, &parseError{Path: dirPath, File: file, Err: err}
}
}
}
if err := sc.Err(); err != nil {
if errors.Is(err, unix.ENOTSUP) {
// Some kernels (e.g. CS9) may return ENOTSUP on read
// if psi=1 kernel cmdline parameter is required.
return nil, nil
}
return nil, &parseError{Path: dirPath, File: file, Err: err}
}
return &psistats, nil
}

func parsePSIData(psi []string) (cgroups.PSIData, error) {
data := cgroups.PSIData{}
for _, f := range psi {
kv := strings.SplitN(f, "=", 2)
if len(kv) != 2 {
return data, fmt.Errorf("invalid psi data: %q", f)
}
var pv *float64
switch kv[0] {
case "avg10":
pv = &data.Avg10
case "avg60":
pv = &data.Avg60
case "avg300":
pv = &data.Avg300
case "total":
v, err := strconv.ParseUint(kv[1], 10, 64)
if err != nil {
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
}
data.Total = v
}
if pv != nil {
v, err := strconv.ParseFloat(kv[1], 64)
if err != nil {
return data, fmt.Errorf("invalid %s PSI value: %w", kv[0], err)
}
*pv = v
}
}
return data, nil
}
47 changes: 47 additions & 0 deletions libcontainer/cgroups/fs2/psi_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package fs2

import (
"os"
"path/filepath"
"reflect"
"testing"

"github.com/opencontainers/runc/libcontainer/cgroups"
)

func TestStatCPUPSI(t *testing.T) {
const examplePSIData = `some avg10=1.71 avg60=2.36 avg300=2.57 total=230548833
full avg10=1.00 avg60=1.01 avg300=1.00 total=157622356`

// We're using a fake cgroupfs.
cgroups.TestMode = true

fakeCgroupDir := t.TempDir()
statPath := filepath.Join(fakeCgroupDir, "cpu.pressure")

if err := os.WriteFile(statPath, []byte(examplePSIData), 0o644); err != nil {
t.Fatal(err)
}

st, err := statPSI(fakeCgroupDir, "cpu.pressure")
if err != nil {
t.Fatal(err)
}

if !reflect.DeepEqual(*st, cgroups.PSIStats{
Some: cgroups.PSIData{
Avg10: 1.71,
Avg60: 2.36,
Avg300: 2.57,
Total: 230548833,
},
Full: cgroups.PSIData{
Avg10: 1.00,
Avg60: 1.01,
Avg300: 1.00,
Total: 157622356,
},
}) {
t.Errorf("unexpected PSI result: %+v", st)
}
}
15 changes: 15 additions & 0 deletions libcontainer/cgroups/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,22 @@ type CpuUsage struct {
UsageInUsermode uint64 `json:"usage_in_usermode"`
}

type PSIData struct {
Avg10 float64 `json:"avg10"`
Avg60 float64 `json:"avg60"`
Avg300 float64 `json:"avg300"`
Total uint64 `json:"total"`
}

type PSIStats struct {
Some PSIData `json:"some,omitempty"`
Full PSIData `json:"full,omitempty"`
}

type CpuStats struct {
CpuUsage CpuUsage `json:"cpu_usage,omitempty"`
ThrottlingData ThrottlingData `json:"throttling_data,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type CPUSetStats struct {
Expand Down Expand Up @@ -89,6 +102,7 @@ type MemoryStats struct {
UseHierarchy bool `json:"use_hierarchy"`

Stats map[string]uint64 `json:"stats,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type PageUsageByNUMA struct {
Expand Down Expand Up @@ -133,6 +147,7 @@ type BlkioStats struct {
IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"`
IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"`
SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type HugetlbStats struct {
Expand Down
29 changes: 29 additions & 0 deletions tests/integration/events.bats
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,35 @@ function teardown() {
[[ "${lines[0]}" == *"data"* ]]
}

# shellcheck disable=SC2030
@test "events --stats with psi data" {
requires root cgroups_v2 psi
init_cgroup_paths

update_config '.linux.resources.cpu |= { "quota": 1000 }'

runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]

# Stress the CPU a bit. Need something that runs for more than 10s.
runc exec test_busybox dd if=/dev/zero bs=1 count=128K of=/dev/null
[ "$status" -eq 0 ]

runc exec test_busybox sh -c 'tail /sys/fs/cgroup/*.pressure'

runc events --stats test_busybox
[ "$status" -eq 0 ]

# Check PSI metrics.
jq '.data.cpu.psi' <<<"${lines[0]}"
for psi_type in some full; do
for psi_metric in avg10 avg60 avg300 total; do
echo -n "checking .data.cpu.psi.$psi_type.$psi_metric != 0: "
jq -e '.data.cpu.psi.'$psi_type.$psi_metric' != 0' <<<"${lines[0]}"
done
done
}

function test_events() {
# XXX: currently cgroups require root containers.
requires root
Expand Down
7 changes: 7 additions & 0 deletions tests/integration/helpers.bash
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,13 @@ function requires() {
skip_me=1
fi
;;
psi)
# If PSI is not compiled in the kernel, the file will not exist.
# If PSI is compiled, but not enabled, read will fail with ENOTSUPP.
if ! cat /sys/fs/cgroup/cpu.pressure &>/dev/null; then
skip_me=1
fi
;;
*)
fail "BUG: Invalid requires $var."
;;
Expand Down
12 changes: 11 additions & 1 deletion types/events.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
package types

import "github.com/opencontainers/runc/libcontainer/intelrdt"
import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/intelrdt"
)

// Event struct for encoding the event data to json.
type Event struct {
Expand All @@ -21,6 +24,10 @@ type Stats struct {
NetworkInterfaces []*NetworkInterface `json:"network_interfaces"`
}

type PSIData = cgroups.PSIData

type PSIStats = cgroups.PSIStats

type Hugetlb struct {
Usage uint64 `json:"usage,omitempty"`
Max uint64 `json:"max,omitempty"`
Expand All @@ -43,6 +50,7 @@ type Blkio struct {
IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"`
IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"`
SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type Pids struct {
Expand All @@ -69,6 +77,7 @@ type CpuUsage struct {
type Cpu struct {
Usage CpuUsage `json:"usage,omitempty"`
Throttling Throttling `json:"throttling,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type CPUSet struct {
Expand Down Expand Up @@ -99,6 +108,7 @@ type Memory struct {
Kernel MemoryEntry `json:"kernel,omitempty"`
KernelTCP MemoryEntry `json:"kernelTCP,omitempty"`
Raw map[string]uint64 `json:"raw,omitempty"`
PSI *PSIStats `json:"psi,omitempty"`
}

type L3CacheInfo struct {
Expand Down

0 comments on commit 1aa7ca8

Please sign in to comment.