From 49edadde13c61941aa52ef7decf081b0c2eef0ea Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Fri, 12 May 2023 11:04:21 -0700 Subject: [PATCH] libct: signalAllProcesses: remove child reaping There are two very distinct usage scenarios for signalAllProcesses: * when used from the runc binary ("runc kill" command), the processes that it kills are not the children of "runc kill", and so calling wait(2) on each process is totally useless, as it will return ECHLD; * when used from a program that have created the container (such as libcontainer/integration test suite), that program can and should call wait(2), not the signalling code. So, the child reaping code is totally useless in the first case, and should be implemented by the program using libcontainer in the second case. Remove it, and add a proper documentation piece. Change the integration test accordingly. PS the first attempt to disable the child reaping code in signalAllProcesses was made in commit bb912eb00c51f, which used a questionable heuristic to figure out whether wait(2) should be called. This heuristic worked for a particular use case, but is not correct in general. While at it, simplify signalAllProcesses to use unix.Kill. Signed-off-by: Kir Kolyshkin --- libcontainer/container_linux.go | 12 +++- libcontainer/init_linux.go | 80 ++------------------------- libcontainer/integration/exec_test.go | 13 +++-- 3 files changed, 22 insertions(+), 83 deletions(-) diff --git a/libcontainer/container_linux.go b/libcontainer/container_linux.go index 3c0857d0604..73264bc90e1 100644 --- a/libcontainer/container_linux.go +++ b/libcontainer/container_linux.go @@ -357,6 +357,12 @@ func (c *Container) start(process *Process) (retErr error) { return nil } +// Signal sends a specified signal to container's init, or, if all is true, +// true, to all container's processes (as determined by container's cgroup). +// +// Setting all=true is useful when s is SIGKILL and the container does not have +// its own PID namespace. In this scenario, the libcontainer user may be required +// to implement a proper child reaper. func (c *Container) Signal(s os.Signal, all bool) error { c.m.Lock() defer c.m.Unlock() @@ -365,12 +371,16 @@ func (c *Container) Signal(s os.Signal, all bool) error { return err } if all { + sig, ok := s.(unix.Signal) + if !ok { + return errors.New("unsupported signal type") + } if status == Stopped && !c.cgroupManager.Exists() { // Avoid calling signalAllProcesses which may print // a warning trying to freeze a non-existing cgroup. return nil } - return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s)) + return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, sig)) } // to avoid a PID reuse attack if status == Running || status == Created || status == Paused { diff --git a/libcontainer/init_linux.go b/libcontainer/init_linux.go index 6a88f1b211c..2ea1f2b1433 100644 --- a/libcontainer/init_linux.go +++ b/libcontainer/init_linux.go @@ -9,7 +9,6 @@ import ( "net" "os" "strings" - "unsafe" "github.com/containerd/console" "github.com/opencontainers/runtime-spec/specs-go" @@ -496,39 +495,9 @@ func setupRlimits(limits []configs.Rlimit, pid int) error { return nil } -const _P_PID = 1 - -//nolint:structcheck,unused -type siginfo struct { - si_signo int32 - si_errno int32 - si_code int32 - // below here is a union; si_pid is the only field we use - si_pid int32 - // Pad to 128 bytes as detailed in blockUntilWaitable - pad [96]byte -} - -// isWaitable returns true if the process has exited false otherwise. -// Its based off blockUntilWaitable in src/os/wait_waitid.go -func isWaitable(pid int) (bool, error) { - si := &siginfo{} - _, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0) - if e != 0 { - return false, &os.SyscallError{Syscall: "waitid", Err: e} - } - - return si.si_pid != 0, nil -} - // signalAllProcesses freezes then iterates over all the processes inside the // manager's cgroups sending the signal s to them. -// If s is SIGKILL and subreaper is not enabled then it will wait for each -// process to exit. -// For all other signals it will check if the process is ready to report its -// exit status and only if it is will a wait be performed. -func signalAllProcesses(m cgroups.Manager, s os.Signal) error { - var procs []*os.Process +func signalAllProcesses(m cgroups.Manager, s unix.Signal) error { if err := m.Freeze(configs.Frozen); err != nil { logrus.Warn(err) } @@ -540,55 +509,14 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error { return err } for _, pid := range pids { - p, err := os.FindProcess(pid) - if err != nil { - logrus.Warn(err) - continue - } - procs = append(procs, p) - if err := p.Signal(s); err != nil { - logrus.Warn(err) + err := unix.Kill(pid, s) + if err != unix.ESRCH { + logrus.Warnf("kill %d: %v", pid, err) } } if err := m.Freeze(configs.Thawed); err != nil { logrus.Warn(err) } - subreaper, err := system.GetSubreaper() - if err != nil { - // The error here means that PR_GET_CHILD_SUBREAPER is not - // supported because this code might run on a kernel older - // than 3.4. We don't want to throw an error in that case, - // and we simplify things, considering there is no subreaper - // set. - subreaper = 0 - } - - for _, p := range procs { - if s != unix.SIGKILL { - if ok, err := isWaitable(p.Pid); err != nil { - if !errors.Is(err, unix.ECHILD) { - logrus.Warn("signalAllProcesses: ", p.Pid, err) - } - continue - } else if !ok { - // Not ready to report so don't wait - continue - } - } - - // In case a subreaper has been setup, this code must not - // wait for the process. Otherwise, we cannot be sure the - // current process will be reaped by the subreaper, while - // the subreaper might be waiting for this process in order - // to retrieve its exit code. - if subreaper == 0 { - if _, err := p.Wait(); err != nil { - if !errors.Is(err, unix.ECHILD) { - logrus.Warn("wait: ", err) - } - } - } - } return nil } diff --git a/libcontainer/integration/exec_test.go b/libcontainer/integration/exec_test.go index 530faf1aeaa..376dd0f1f3b 100644 --- a/libcontainer/integration/exec_test.go +++ b/libcontainer/integration/exec_test.go @@ -1373,7 +1373,7 @@ func TestPIDHostInitProcessWait(t *testing.T) { process1 := &libcontainer.Process{ Cwd: "/", - Args: []string{"sleep", "100"}, + Args: []string{"sleep", "1h"}, Env: standardEnvironment, Init: true, } @@ -1382,7 +1382,7 @@ func TestPIDHostInitProcessWait(t *testing.T) { process2 := &libcontainer.Process{ Cwd: "/", - Args: []string{"sleep", "100"}, + Args: []string{"sleep", "1h"}, Env: standardEnvironment, Init: false, } @@ -1397,10 +1397,11 @@ func TestPIDHostInitProcessWait(t *testing.T) { t.Fatal("expected Wait to indicate failure") } - // The non-init process must've been killed. - err = process2.Signal(syscall.Signal(0)) - if err == nil || err.Error() != "no such process" { - t.Fatalf("expected process to have been killed: %v", err) + // The non-init process must've also been killed. If not, + // the test will time out. + _, err = process2.Wait() + if err == nil { + t.Fatal("expected Wait to indicate failure") } }