From e1584831b6835d3e8b9a239cb10503bcf6b9b9e8 Mon Sep 17 00:00:00 2001 From: Kailun Qin Date: Thu, 9 Sep 2021 18:29:31 -0400 Subject: [PATCH] libct/cg: add CFS bandwidth burst for CPU Burstable CFS controller is introduced in Linux 5.14. This helps with parallel workloads that might be bursty. They can get throttled even when their average utilization is under quota. And they may be latency sensitive at the same time so that throttling them is undesired. This feature borrows time now against the future underrun, at the cost of increased interference against the other system users, by introducing cfs_burst_us into CFS bandwidth control to enact the cap on unused bandwidth accumulation, which will then used additionally for burst. The patch adds the support/control for CFS bandwidth burst. runtime-spec: https://github.com/opencontainers/runtime-spec/pull/1120 Co-authored-by: Akihiro Suda Co-authored-by: Nadeshiko Manju Signed-off-by: Kailun Qin --- contrib/completions/bash/runc | 1 + docs/spec-conformance.md | 1 - libcontainer/cgroups/fs/cpu.go | 29 ++++++++++++++++++++++++++++ libcontainer/cgroups/fs/cpu_test.go | 12 ++++++++++++ libcontainer/cgroups/fs/fs.go | 2 +- libcontainer/cgroups/fs2/cpu.go | 27 +++++++++++++++++++++++++- libcontainer/configs/cgroup_linux.go | 3 +++ libcontainer/specconv/spec_linux.go | 3 +++ man/runc-update.8.md | 4 ++++ tests/integration/helpers.bash | 16 ++++++++++++++- tests/integration/update.bats | 6 ++++++ update.go | 8 ++++++++ 12 files changed, 108 insertions(+), 4 deletions(-) diff --git a/contrib/completions/bash/runc b/contrib/completions/bash/runc index 69a3b953750..782234e86d4 100644 --- a/contrib/completions/bash/runc +++ b/contrib/completions/bash/runc @@ -735,6 +735,7 @@ _runc_update() { --blkio-weight --cpu-period --cpu-quota + --cpu-burst --cpu-rt-period --cpu-rt-runtime --cpu-share diff --git a/docs/spec-conformance.md b/docs/spec-conformance.md index b512e5f5bb1..4ec89dcab4a 100644 --- a/docs/spec-conformance.md +++ b/docs/spec-conformance.md @@ -10,7 +10,6 @@ Spec version | Feature | PR v1.0.0 | `SCMP_ARCH_PARISC` | Unplanned, due to lack of users v1.0.0 | `SCMP_ARCH_PARISC64` | Unplanned, due to lack of users v1.0.2 | `.linux.personality` | [#3126](https://github.com/opencontainers/runc/pull/3126) -v1.1.0 | `.linux.resources.cpu.burst` | [#3749](https://github.com/opencontainers/runc/pull/3749) v1.1.0 | `SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV` | [#3862](https://github.com/opencontainers/runc/pull/3862) v1.1.0 | time namespaces | [#3876](https://github.com/opencontainers/runc/pull/3876) v1.1.0 | rsvd hugetlb cgroup | TODO ([#3859](https://github.com/opencontainers/runc/issues/3859)) diff --git a/libcontainer/cgroups/fs/cpu.go b/libcontainer/cgroups/fs/cpu.go index 26a9d7c0f5f..727f7f9184f 100644 --- a/libcontainer/cgroups/fs/cpu.go +++ b/libcontainer/cgroups/fs/cpu.go @@ -84,6 +84,28 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { period = "" } } + + var burst string + if r.CpuBurst != nil { + burst = strconv.FormatUint(*r.CpuBurst, 10) + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + // this is a special trick for burst feature, the current systemd and low version of kernel will not support it. + // So, an `no such file or directory` error would be raised, and we can ignore it . + if !errors.Is(err, unix.ENOENT) { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } + } else { + burst = "" + } + } if r.CpuQuota != 0 { if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { return err @@ -93,6 +115,13 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { return err } } + if burst != "" { + if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { + if !errors.Is(err, unix.ENOENT) { + return err + } + } + } } if r.CPUIdle != nil { diff --git a/libcontainer/cgroups/fs/cpu_test.go b/libcontainer/cgroups/fs/cpu_test.go index bbdd45a7118..4848b56a354 100644 --- a/libcontainer/cgroups/fs/cpu_test.go +++ b/libcontainer/cgroups/fs/cpu_test.go @@ -45,6 +45,7 @@ func TestCpuSetBandWidth(t *testing.T) { const ( quotaBefore = 8000 quotaAfter = 5000 + burstBefore = 2000 periodBefore = 10000 periodAfter = 7000 rtRuntimeBefore = 8000 @@ -52,9 +53,11 @@ func TestCpuSetBandWidth(t *testing.T) { rtPeriodBefore = 10000 rtPeriodAfter = 7000 ) + burstAfter := uint64(1000) writeFileContents(t, path, map[string]string{ "cpu.cfs_quota_us": strconv.Itoa(quotaBefore), + "cpu.cfs_burst_us": strconv.Itoa(burstBefore), "cpu.cfs_period_us": strconv.Itoa(periodBefore), "cpu.rt_runtime_us": strconv.Itoa(rtRuntimeBefore), "cpu.rt_period_us": strconv.Itoa(rtPeriodBefore), @@ -62,6 +65,7 @@ func TestCpuSetBandWidth(t *testing.T) { r := &configs.Resources{ CpuQuota: quotaAfter, + CpuBurst: &burstAfter, CpuPeriod: periodAfter, CpuRtRuntime: rtRuntimeAfter, CpuRtPeriod: rtPeriodAfter, @@ -79,6 +83,14 @@ func TestCpuSetBandWidth(t *testing.T) { t.Fatal("Got the wrong value, set cpu.cfs_quota_us failed.") } + burst, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_burst_us") + if err != nil { + t.Fatal(err) + } + if burst != burstAfter { + t.Fatal("Got the wrong value, set cpu.cfs_burst_us failed.") + } + period, err := fscommon.GetCgroupParamUint(path, "cpu.cfs_period_us") if err != nil { t.Fatal(err) diff --git a/libcontainer/cgroups/fs/fs.go b/libcontainer/cgroups/fs/fs.go index d2decb127ca..e2c425d0cd0 100644 --- a/libcontainer/cgroups/fs/fs.go +++ b/libcontainer/cgroups/fs/fs.go @@ -191,7 +191,7 @@ func (m *Manager) Set(r *configs.Resources) error { if path == "" { // We never created a path for this cgroup, so we cannot set // limits for it (though we have already tried at this point). - return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) + return fmt.Errorf("cannot set %s limit: container could not join or create cgroup, and the error is %w", sys.Name(), err) } return err } diff --git a/libcontainer/cgroups/fs2/cpu.go b/libcontainer/cgroups/fs2/cpu.go index 3f8deb0adbe..8ee49d499f1 100644 --- a/libcontainer/cgroups/fs2/cpu.go +++ b/libcontainer/cgroups/fs2/cpu.go @@ -2,16 +2,19 @@ package fs2 import ( "bufio" + "errors" "os" "strconv" + "golang.org/x/sys/unix" + "github.com/opencontainers/runc/libcontainer/cgroups" "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" "github.com/opencontainers/runc/libcontainer/configs" ) func isCpuSet(r *configs.Resources) bool { - return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil + return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 || r.CPUIdle != nil || r.CpuBurst != nil } func setCpu(dirPath string, r *configs.Resources) error { @@ -32,6 +35,23 @@ func setCpu(dirPath string, r *configs.Resources) error { } } + var burst string + if r.CpuBurst != nil { + burst = strconv.FormatUint(*r.CpuBurst, 10) + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + // Sometimes when the burst to be set is larger + // than the current one, it is rejected by the kernel + // (EINVAL) as old_quota/new_burst exceeds the parent + // cgroup quota limit. If this happens and the quota is + // going to be set, ignore the error for now and retry + // after setting the quota. + if !errors.Is(err, unix.EINVAL) || r.CpuQuota == 0 { + return err + } + } else { + burst = "" + } + } if r.CpuQuota != 0 || r.CpuPeriod != 0 { str := "max" if r.CpuQuota > 0 { @@ -47,6 +67,11 @@ func setCpu(dirPath string, r *configs.Resources) error { if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { return err } + if burst != "" { + if err := cgroups.WriteFile(dirPath, "cpu.max.burst", burst); err != nil { + return err + } + } } return nil diff --git a/libcontainer/configs/cgroup_linux.go b/libcontainer/configs/cgroup_linux.go index 1664304be21..4a34cf76fc5 100644 --- a/libcontainer/configs/cgroup_linux.go +++ b/libcontainer/configs/cgroup_linux.go @@ -69,6 +69,9 @@ type Resources struct { // CPU hardcap limit (in usecs). Allowed cpu time in a given period. CpuQuota int64 `json:"cpu_quota"` + // CPU hardcap burst limit (in usecs). Allowed accumulated cpu time additionally for burst in a given period. + CpuBurst *uint64 `json:"cpu_burst"` //nolint:revive + // CPU period to be used for hardcapping (in usecs). 0 to use system default. CpuPeriod uint64 `json:"cpu_period"` diff --git a/libcontainer/specconv/spec_linux.go b/libcontainer/specconv/spec_linux.go index 88f9c6777ef..818afcff59e 100644 --- a/libcontainer/specconv/spec_linux.go +++ b/libcontainer/specconv/spec_linux.go @@ -755,6 +755,9 @@ func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*confi if r.CPU.Quota != nil { c.Resources.CpuQuota = *r.CPU.Quota } + if r.CPU.Burst != nil { + c.Resources.CpuBurst = r.CPU.Burst + } if r.CPU.Period != nil { c.Resources.CpuPeriod = *r.CPU.Period } diff --git a/man/runc-update.8.md b/man/runc-update.8.md index 8ceaa386fe8..8ca69451ddd 100644 --- a/man/runc-update.8.md +++ b/man/runc-update.8.md @@ -28,6 +28,7 @@ In case **-r** is used, the JSON format is like this: "cpu": { "shares": 0, "quota": 0, + "burst": 0, "period": 0, "realtimeRuntime": 0, "realtimePeriod": 0, @@ -53,6 +54,9 @@ stdin. If this option is used, all other options are ignored. **--cpu-quota** _num_ : Set CPU usage limit within a given period (in microseconds). +**--cpu-burst** _num_ +: Set CPU burst limit within a given period (in microseconds). + **--cpu-rt-period** _num_ : Set CPU realtime period to be used for hardcapping (in microseconds). diff --git a/tests/integration/helpers.bash b/tests/integration/helpers.bash index 65912951d58..cd08fb2459f 100755 --- a/tests/integration/helpers.bash +++ b/tests/integration/helpers.bash @@ -260,11 +260,16 @@ function get_cgroup_value() { # Helper to check a if value in a cgroup file matches the expected one. function check_cgroup_value() { local current + local cgroup + cgroup="$(get_cgroup_path "$1")" + if [ ! -f "$cgroup/$1" ]; then + skip "$cgroup/$1 does not exist" + fi current="$(get_cgroup_value "$1")" local expected=$2 echo "current $current !? $expected" - [ "$current" = "$expected" ] + [ "$current" = "$expected" ] || [ "$current" = "$((expected / 1000))" ] } # Helper to check a value in systemd. @@ -310,6 +315,15 @@ function check_cpu_quota() { check_systemd_value "CPUQuotaPeriodUSec" $sd_period $sd_infinity } +function check_cpu_burst() { + local burst=$1 + if [ -v CGROUP_V2 ]; then + check_cgroup_value "cpu.max.burst" "$burst" + else + check_cgroup_value "cpu.cfs_burst_us" "$burst" + fi +} + # Works for cgroup v1 and v2, accepts v1 shares as an argument. function check_cpu_shares() { local shares=$1 diff --git a/tests/integration/update.bats b/tests/integration/update.bats index 5a87fa1500d..616fe809b9c 100644 --- a/tests/integration/update.bats +++ b/tests/integration/update.bats @@ -288,6 +288,12 @@ EOF runc update test_update --cpu-share 200 [ "$status" -eq 0 ] check_cpu_shares 200 + runc update test_update --cpu-period 900000 --cpu-burst 500000 + [ "$status" -eq 0 ] + check_cpu_burst 500000 + runc update test_update --cpu-period 900000 --cpu-burst 0 + [ "$status" -eq 0 ] + check_cpu_burst 0 # Revert to the test initial value via json on stding runc update -r - test_update <