Skip to content

Commit

Permalink
merge pr #3825
Browse files Browse the repository at this point in the history
Kir Kolyshkin (7):
  libct: implement support for cgroup.kill
  runc kill: drop -a option
  libct: move killing logic to container.Signal
  libct: fix shared pidns detection
  libct: signalAllProcesses: remove child reaping
  tests/int/kill: add kill -a with host pidns test
  tests/rootless.sh: drop set -x

LGTMs: AkihiroSuda cyphar
Closes #3825
  • Loading branch information
cyphar committed Jun 10, 2023
2 parents 5f3f559 + 7d09ba1 commit 14456ef
Show file tree
Hide file tree
Showing 11 changed files with 154 additions and 146 deletions.
4 changes: 2 additions & 2 deletions delete.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,10 @@ import (
)

func killContainer(container *libcontainer.Container) error {
_ = container.Signal(unix.SIGKILL, false)
_ = container.Signal(unix.SIGKILL)
for i := 0; i < 100; i++ {
time.Sleep(100 * time.Millisecond)
if err := container.Signal(unix.Signal(0), false); err != nil {
if err := container.Signal(unix.Signal(0)); err != nil {
destroy(container)
return nil
}
Expand Down
13 changes: 10 additions & 3 deletions kill.go
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
package main

import (
"errors"
"fmt"
"strconv"
"strings"

"github.com/opencontainers/runc/libcontainer"
"github.com/urfave/cli"
"golang.org/x/sys/unix"
)
Expand All @@ -24,8 +26,9 @@ signal to the init process of the "ubuntu01" container:
# runc kill ubuntu01 KILL`,
Flags: []cli.Flag{
cli.BoolFlag{
Name: "all, a",
Usage: "send the specified signal to all processes inside the container",
Name: "all, a",
Usage: "(obsoleted, do not use)",
Hidden: true,
},
},
Action: func(context *cli.Context) error {
Expand All @@ -49,7 +52,11 @@ signal to the init process of the "ubuntu01" container:
if err != nil {
return err
}
return container.Signal(signal, context.Bool("all"))
err = container.Signal(signal)
if errors.Is(err, libcontainer.ErrNotRunning) && context.Bool("all") {
err = nil
}
return err
},
}

Expand Down
12 changes: 12 additions & 0 deletions libcontainer/configs/namespaces_syscall.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,15 @@ func (n *Namespaces) CloneFlags() uintptr {
}
return uintptr(flag)
}

// IsPrivate tells whether the namespace of type t is configured as private
// (i.e. it exists and is not shared).
func (n Namespaces) IsPrivate(t NamespaceType) bool {
for _, v := range n {
if v.Type == t {
return v.Path == ""
}
}
// Not found, so implicitly sharing a parent namespace.
return false
}
63 changes: 35 additions & 28 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -357,37 +357,47 @@ func (c *Container) start(process *Process) (retErr error) {
return nil
}

func (c *Container) Signal(s os.Signal, all bool) error {
// Signal sends a specified signal to container's init.
//
// When s is SIGKILL and the container does not have its own PID namespace, all
// the container's processes are killed. In this scenario, the libcontainer
// user may be required to implement a proper child reaper.
func (c *Container) Signal(s os.Signal) error {
c.m.Lock()
defer c.m.Unlock()
status, err := c.currentStatus()
if err != nil {
return err
}
if all {
if status == Stopped && !c.cgroupManager.Exists() {
// Avoid calling signalAllProcesses which may print
// a warning trying to freeze a non-existing cgroup.
return nil
}
return c.ignoreCgroupError(signalAllProcesses(c.cgroupManager, s))
}
// to avoid a PID reuse attack
if status == Running || status == Created || status == Paused {
if err := c.initProcess.signal(s); err != nil {
return fmt.Errorf("unable to signal init: %w", err)
}
if status == Paused {
// For cgroup v1, killing a process in a frozen cgroup
// does nothing until it's thawed. Only thaw the cgroup
// for SIGKILL.
if s, ok := s.(unix.Signal); ok && s == unix.SIGKILL {
_ = c.cgroupManager.Freeze(configs.Thawed)
}
}
return nil
// To avoid a PID reuse attack, don't kill non-running container.
switch status {
case Running, Created, Paused:
default:
return ErrNotRunning
}
return ErrNotRunning

// When a container has its own PID namespace, inside it the init PID
// is 1, and thus it is handled specially by the kernel. In particular,
// killing init with SIGKILL from an ancestor namespace will also kill
// all other processes in that PID namespace (see pid_namespaces(7)).
//
// OTOH, if PID namespace is shared, we should kill all pids to avoid
// leftover processes.
if s == unix.SIGKILL && !c.config.Namespaces.IsPrivate(configs.NEWPID) {
err = signalAllProcesses(c.cgroupManager, unix.SIGKILL)
} else {
err = c.initProcess.signal(s)
}
if err != nil {
return fmt.Errorf("unable to signal init: %w", err)
}
if status == Paused && s == unix.SIGKILL {
// For cgroup v1, killing a process in a frozen cgroup
// does nothing until it's thawed. Only thaw the cgroup
// for SIGKILL.
_ = c.cgroupManager.Freeze(configs.Thawed)
}
return nil
}

func (c *Container) createExecFifo() error {
Expand Down Expand Up @@ -539,7 +549,6 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
nsMaps[ns.Type] = ns.Path
}
}
_, sharePidns := nsMaps[configs.NEWPID]
data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps, initStandard)
if err != nil {
return nil, err
Expand Down Expand Up @@ -584,7 +593,6 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, l
container: c,
process: p,
bootstrapData: data,
sharePidns: sharePidns,
}
c.initProcess = init
return init, nil
Expand Down Expand Up @@ -687,8 +695,7 @@ func (c *Container) newInitConfig(process *Process) *initConfig {
return cfg
}

// Destroy destroys the container, if its in a valid state, after killing any
// remaining running processes.
// Destroy destroys the container, if its in a valid state.
//
// Any event registrations are removed before the container is destroyed.
// No error is returned if the container is already destroyed.
Expand Down
91 changes: 15 additions & 76 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
"runtime/debug"
"strconv"
"strings"
"unsafe"

"github.com/containerd/console"
"github.com/opencontainers/runtime-spec/specs-go"
Expand Down Expand Up @@ -575,39 +574,20 @@ func setupRlimits(limits []configs.Rlimit, pid int) error {
return nil
}

const _P_PID = 1

//nolint:structcheck,unused
type siginfo struct {
si_signo int32
si_errno int32
si_code int32
// below here is a union; si_pid is the only field we use
si_pid int32
// Pad to 128 bytes as detailed in blockUntilWaitable
pad [96]byte
}

// isWaitable returns true if the process has exited false otherwise.
// Its based off blockUntilWaitable in src/os/wait_waitid.go
func isWaitable(pid int) (bool, error) {
si := &siginfo{}
_, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0)
if e != 0 {
return false, &os.SyscallError{Syscall: "waitid", Err: e}
}

return si.si_pid != 0, nil
}

// signalAllProcesses freezes then iterates over all the processes inside the
// manager's cgroups sending the signal s to them.
// If s is SIGKILL and subreaper is not enabled then it will wait for each
// process to exit.
// For all other signals it will check if the process is ready to report its
// exit status and only if it is will a wait be performed.
func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
var procs []*os.Process
func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {
// Use cgroup.kill, if available.
if s == unix.SIGKILL {
if p := m.Path(""); p != "" { // Either cgroup v2 or hybrid.
err := cgroups.WriteFile(p, "cgroup.kill", "1")
if err == nil || !errors.Is(err, os.ErrNotExist) {
return err
}
// Fallback to old implementation.
}
}

if err := m.Freeze(configs.Frozen); err != nil {
logrus.Warn(err)
}
Expand All @@ -619,55 +599,14 @@ func signalAllProcesses(m cgroups.Manager, s os.Signal) error {
return err
}
for _, pid := range pids {
p, err := os.FindProcess(pid)
if err != nil {
logrus.Warn(err)
continue
}
procs = append(procs, p)
if err := p.Signal(s); err != nil {
logrus.Warn(err)
err := unix.Kill(pid, s)
if err != nil && err != unix.ESRCH { //nolint:errorlint // unix errors are bare
logrus.Warnf("kill %d: %v", pid, err)
}
}
if err := m.Freeze(configs.Thawed); err != nil {
logrus.Warn(err)
}

subreaper, err := system.GetSubreaper()
if err != nil {
// The error here means that PR_GET_CHILD_SUBREAPER is not
// supported because this code might run on a kernel older
// than 3.4. We don't want to throw an error in that case,
// and we simplify things, considering there is no subreaper
// set.
subreaper = 0
}

for _, p := range procs {
if s != unix.SIGKILL {
if ok, err := isWaitable(p.Pid); err != nil {
if !errors.Is(err, unix.ECHILD) {
logrus.Warn("signalAllProcesses: ", p.Pid, err)
}
continue
} else if !ok {
// Not ready to report so don't wait
continue
}
}

// In case a subreaper has been setup, this code must not
// wait for the process. Otherwise, we cannot be sure the
// current process will be reaped by the subreaper, while
// the subreaper might be waiting for this process in order
// to retrieve its exit code.
if subreaper == 0 {
if _, err := p.Wait(); err != nil {
if !errors.Is(err, unix.ECHILD) {
logrus.Warn("wait: ", err)
}
}
}
}
return nil
}
37 changes: 24 additions & 13 deletions libcontainer/integration/exec_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1355,16 +1355,26 @@ func TestPIDHost(t *testing.T) {
}
}

func TestPIDHostInitProcessWait(t *testing.T) {
func TestHostPidnsInitKill(t *testing.T) {
config := newTemplateConfig(t, nil)
// Implicitly use host pid ns.
config.Namespaces.Remove(configs.NEWPID)
testPidnsInitKill(t, config)
}

func TestSharedPidnsInitKill(t *testing.T) {
config := newTemplateConfig(t, nil)
// Explicitly use host pid ns.
config.Namespaces.Add(configs.NEWPID, "/proc/1/ns/pid")
testPidnsInitKill(t, config)
}

func testPidnsInitKill(t *testing.T, config *configs.Config) {
if testing.Short() {
return
}

pidns := "/proc/1/ns/pid"

// Run a container with two long-running processes.
config := newTemplateConfig(t, nil)
config.Namespaces.Add(configs.NEWPID, pidns)
container, err := newContainer(t, config)
ok(t, err)
defer func() {
Expand All @@ -1373,7 +1383,7 @@ func TestPIDHostInitProcessWait(t *testing.T) {

process1 := &libcontainer.Process{
Cwd: "/",
Args: []string{"sleep", "100"},
Args: []string{"sleep", "1h"},
Env: standardEnvironment,
Init: true,
}
Expand All @@ -1382,25 +1392,26 @@ func TestPIDHostInitProcessWait(t *testing.T) {

process2 := &libcontainer.Process{
Cwd: "/",
Args: []string{"sleep", "100"},
Args: []string{"sleep", "1h"},
Env: standardEnvironment,
Init: false,
}
err = container.Run(process2)
ok(t, err)

// Kill the init process and Wait for it.
err = process1.Signal(syscall.SIGKILL)
// Kill the container.
err = container.Signal(syscall.SIGKILL)
ok(t, err)
_, err = process1.Wait()
if err == nil {
t.Fatal("expected Wait to indicate failure")
}

// The non-init process must've been killed.
err = process2.Signal(syscall.Signal(0))
if err == nil || err.Error() != "no such process" {
t.Fatalf("expected process to have been killed: %v", err)
// The non-init process must've also been killed. If not,
// the test will time out.
_, err = process2.Wait()
if err == nil {
t.Fatal("expected Wait to indicate failure")
}
}

Expand Down
5 changes: 0 additions & 5 deletions libcontainer/process_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,6 @@ type initProcess struct {
fds []string
process *Process
bootstrapData io.Reader
sharePidns bool
}

func (p *initProcess) pid() int {
Expand Down Expand Up @@ -616,10 +615,6 @@ func (p *initProcess) start() (retErr error) {

func (p *initProcess) wait() (*os.ProcessState, error) {
err := p.cmd.Wait()
// we should kill all processes in cgroup when init is died if we use host PID namespace
if p.sharePidns {
_ = signalAllProcesses(p.manager, unix.SIGKILL)
}
return p.cmd.ProcessState, err
}

Expand Down
7 changes: 0 additions & 7 deletions libcontainer/state_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import (

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"golang.org/x/sys/unix"
)

Expand Down Expand Up @@ -36,12 +35,6 @@ type containerState interface {
}

func destroy(c *Container) error {
if !c.config.Namespaces.Contains(configs.NEWPID) ||
c.config.Namespaces.PathOf(configs.NEWPID) != "" {
if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil {
logrus.Warn(err)
}
}
err := c.cgroupManager.Destroy()
if c.intelRdtManager != nil {
if ierr := c.intelRdtManager.Destroy(); err == nil {
Expand Down
Loading

0 comments on commit 14456ef

Please sign in to comment.