From fc1a882a2c49c88613a2e35828aea98b136a0809 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Mon, 30 Oct 2023 18:09:07 -0700 Subject: [PATCH] runc kill: fix sending KILL to non-pidns container Commit f8ad20f made it impossible to kill leftover processes in a stopped container that does not have its own PID namespace. In other words, if a container init is gone, it is no longer possible to use `runc kill` to kill the leftover processes. Fix this by moving the check if container init exists to after the special case of handling the container without own PID namespace. While at it, fix the minor issue introduced by commit 9583b3d: if signalAllProcesses is used, there is no need to thaw the container (as freeze/thaw is either done in signalAllProcesses already, or not needed at all). Also, make signalAllProcesses return an error early if the container cgroup does not exist (as it relies on it to do its job). This way, the error message returned is more generic and easier to understand ("container not running" instead of "can't open file"). Finally, add a test case. Fixes: f8ad20f Fixes: 9583b3d Co-authored-by: lifubang Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 21 +++++++++++---------- libcontainer/init_linux.go | 3 +++ tests/integration/kill.bats | 31 ++++++++++++++++++++++++++++--- 3 files changed, 42 insertions(+), 13 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3793b7fc339..df652200670 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -364,10 +364,6 @@ func (c *Container) start(process *Process) (retErr error) { func (c *Container) Signal(s os.Signal) error { c.m.Lock() defer c.m.Unlock() - // To avoid a PID reuse attack, don't kill non-running container. - if !c.hasInit() { - return ErrNotRunning - } // When a container has its own PID namespace, inside it the init PID // is 1, and thus it is handled specially by the kernel. In particular, @@ -375,14 +371,19 @@ func (c *Container) Signal(s os.Signal) error { // all other processes in that PID namespace (see pid_namespaces(7)). // // OTOH, if PID namespace is shared, we should kill all pids to avoid - // leftover processes. - var err error + // leftover processes. Handle this special case here. if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) { - err = signalAllProcesses(c.cgroupManager, unix.SIGKILL) - } else { - err = c.initProcess.signal(s) + if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { + return fmt.Errorf("unable to kill all processes: %w", err) + } + return nil } - if err != nil { + + // To avoid a PID reuse attack, don't kill non-running container. + if !c.hasInit() { + return ErrNotRunning + } + if err := c.initProcess.signal(s); err != nil { return fmt.Errorf("unable to signal init: %w", err) } if s == unix.SIGKILL { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 624e0199e9a..293f7e64115 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -661,6 +661,9 @@ func setupPersonality(config *configs.Config) error { // signalAllProcesses freezes then iterates over all the processes inside the // manager's cgroups sending the signal s to them. func signalAllProcesses(m cgroups.Manager, s unix.Signal) error { + if !m.Exists() { + return ErrNotRunning + } // Use cgroup.kill, if available. if s == unix.SIGKILL { if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid. diff --git a/tests/integration/kill.bats b/tests/integration/kill.bats index 8f2f4a7b4e8..5b61d67cd7e 100644 --- a/tests/integration/kill.bats +++ b/tests/integration/kill.bats @@ -36,14 +36,21 @@ function teardown() { } # This is roughly the same as TestPIDHostInitProcessWait in libcontainer/integration. -@test "kill KILL [host pidns]" { - # kill -a currently requires cgroup freezer. +# The differences are: +# +# 1. Here we use separate processes to create and to kill a container, so the +# processes inside a container are not children of "runc kill". +# +# 2. We hit different codepaths (nonChildProcess.signal rather than initProcess.signal). +# +# 3. (Optionally) tests the case of killing the container which init process is +# gone, to ensure the issue #4047 is fixed. +test_host_pidns_kill() { requires cgroups_freezer update_config ' .linux.namespaces -= [{"type": "pid"}]' set_cgroups_path if [ $EUID -ne 0 ]; then - # kill --all requires working cgroups. requires rootless_cgroup update_config ' .mounts |= map((select(.type == "proc") | .type = "none" @@ -61,6 +68,13 @@ function teardown() { [ "$status" -eq 0 ] done + if [ -v KILL_INIT ]; then + # Now kill the container's init process (sh). Since the container do + # not have own PID ns, its init is no special and the container will + # still be up and running. + __runc exec test_busybox killall -9 sh + fi + # Get the list of all container processes. path=$(get_cgroup_path "pids") pids=$(cat "$path"/cgroup.procs) @@ -82,3 +96,14 @@ function teardown() { [[ "$output" = *"No such process" ]] done } + +@test "kill KILL [host pidns]" { + unset KILL_INIT + test_host_pidns_kill +} + +@test "kill KILL [host pidns + init gone]" { + KILL_INIT=1 + test_host_pidns_kill + unset KILL_INIT +}