From 34f1d3bac2981efc484c396e84cc11665e674dd6 Mon Sep 17 00:00:00 2001 From: lifubang Date: Fri, 11 Oct 2024 09:22:48 +0800 Subject: [PATCH 1/2] join the cgroup after the initial setup finished We should join the cgroup after the initial setup finished, but before runc init clone new children processes. (#4427) Because we should try our best to reduce the influence of memory cgroup accounting from all runc init processes before we start the container init process. Signed-off-by: lifubang --- libcontainer/process_linux.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/libcontainer/process_linux.go b/libcontainer/process_linux.go index ac3b104ea02..822edefc9bb 100644 --- a/libcontainer/process_linux.go +++ b/libcontainer/process_linux.go @@ -407,6 +407,13 @@ func (p *initProcess) start() (retErr error) { } }() + // We should join the cgroup after the initial setup finished, + // but before runc init clone new children processes. (#4427) + err = <-waitInit + if err != nil { + return err + } + // Do this before syncing with child so that no children can escape the // cgroup. We don't need to worry about not doing this and not being root // because we'd be using the rootless cgroup manager in that case. @@ -421,10 +428,6 @@ func (p *initProcess) start() (retErr error) { if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil { return fmt.Errorf("can't copy bootstrap data to pipe: %w", err) } - err = <-waitInit - if err != nil { - return err - } childPid, err := p.getChildPid() if err != nil { From e52d0d1a42cc024dfeed0bf0afb1fccf8512bdac Mon Sep 17 00:00:00 2001 From: lifubang Date: Tue, 15 Oct 2024 17:52:56 +0800 Subject: [PATCH 2/2] Revert #4423 "increase memory.max in cgroups.bats" As we will fix the race between binary clone and cgroup join, we can eliminate the impacts of memory accounting from ensure_clone_binary. So runc will support lower memory useage the same as before. This reverts commit 719e2bc2c37625bd5a51a6090eda75974fa54779. Signed-off-by: lifubang --- tests/integration/cgroups.bats | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/integration/cgroups.bats b/tests/integration/cgroups.bats index 11a3ff967ae..810eaf8a074 100644 --- a/tests/integration/cgroups.bats +++ b/tests/integration/cgroups.bats @@ -259,7 +259,7 @@ convert_hugetlb_size() { "memory.min": "131072", "memory.low": "524288", "memory.high": "5242880", - "memory.max": "20484096", + "memory.max": "10485760", "memory.swap.max": "20971520", "pids.max": "99", "cpu.max": "10000 100000", @@ -276,7 +276,7 @@ convert_hugetlb_size() { echo "$output" | grep -q '^memory.min:131072$' echo "$output" | grep -q '^memory.low:524288$' echo "$output" | grep -q '^memory.high:5242880$' - echo "$output" | grep -q '^memory.max:20484096$' + echo "$output" | grep -q '^memory.max:10485760$' echo "$output" | grep -q '^memory.swap.max:20971520$' echo "$output" | grep -q '^pids.max:99$' echo "$output" | grep -q '^cpu.max:10000 100000$' @@ -284,7 +284,7 @@ convert_hugetlb_size() { check_systemd_value "MemoryMin" 131072 check_systemd_value "MemoryLow" 524288 check_systemd_value "MemoryHigh" 5242880 - check_systemd_value "MemoryMax" 20484096 + check_systemd_value "MemoryMax" 10485760 check_systemd_value "MemorySwapMax" 20971520 check_systemd_value "TasksMax" 99 check_cpu_quota 10000 100000 "100ms" @@ -304,7 +304,7 @@ convert_hugetlb_size() { } | .linux.resources.unified |= { "memory.min": "131072", - "memory.max": "40484864", + "memory.max": "10485760", "pids.max": "42", "cpu.max": "5000 50000", "cpu.weight": "42" @@ -319,7 +319,7 @@ convert_hugetlb_size() { runc exec test_cgroups_unified cat /sys/fs/cgroup/memory.max [ "$status" -eq 0 ] - [ "$output" = '40484864' ] + [ "$output" = '10485760' ] runc exec test_cgroups_unified cat /sys/fs/cgroup/pids.max [ "$status" -eq 0 ]