Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[backport] chown cgroup to process uid in container namespace #3311

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
package systemd

import (
"bufio"
"fmt"
"math"
"os"
"path/filepath"
"strconv"
"strings"
Expand Down Expand Up @@ -291,9 +293,46 @@ func (m *unifiedManager) Apply(pid int) error {
if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil {
return err
}

if c.OwnerUID != nil {
filesToChown, err := cgroupFilesToChown()
if err != nil {
return err
}

for _, v := range filesToChown {
err := os.Chown(m.path+"/"+v, *c.OwnerUID, -1)
if err != nil {
return err
}
}
}

return nil
}

// The kernel exposes a list of files that should be chowned to the delegate
// uid in /sys/kernel/cgroup/delegate. If the file is not present
// (Linux < 4.15), use the initial values mentioned in cgroups(7).
func cgroupFilesToChown() ([]string, error) {
filesToChown := []string{"."} // the directory itself must be chowned
const cgroupDelegateFile = "/sys/kernel/cgroup/delegate"
f, err := os.Open(cgroupDelegateFile)
if err == nil {
defer f.Close()
scanner := bufio.NewScanner(f)
for scanner.Scan() {
filesToChown = append(filesToChown, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("error reading %s: %w", cgroupDelegateFile, err)
}
} else {
filesToChown = append(filesToChown, "cgroup.procs", "cgroup.subtree_control", "cgroup.threads")
}
return filesToChown, nil
}

func (m *unifiedManager) Destroy() error {
if m.cgroups.Paths != nil {
return nil
Expand Down
6 changes: 6 additions & 0 deletions libcontainer/configs/cgroup_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ type Cgroup struct {
// derived from org.systemd.property.xxx annotations.
// Ignored unless systemd is used for managing cgroups.
SystemdProps []systemdDbus.Property `json:"-"`
// The host UID that should own the cgroup, or nil to accept
// the default ownership. This should only be set when the
// cgroupfs is to be mounted read/write.
// Not all cgroup manager implementations support changing
// the ownership.
OwnerUID *int `json:"owner_uid,omitempty"`
}

type Resources struct {
Expand Down
43 changes: 43 additions & 0 deletions libcontainer/specconv/spec_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,49 @@ func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) {
}
}
}

// Set the host UID that should own the container's cgroup.
// This must be performed after setupUserNamespace, so that
// config.HostRootUID() returns the correct result.
//
// Only set it if the container will have its own cgroup
// namespace and the cgroupfs will be mounted read/write.
//
hasCgroupNS := config.Namespaces.Contains(configs.NEWCGROUP) && config.Namespaces.PathOf(configs.NEWCGROUP) == ""
hasRwCgroupfs := false
if hasCgroupNS {
for _, m := range config.Mounts {
if m.Source == "cgroup" && filepath.Clean(m.Destination) == "/sys/fs/cgroup" && (m.Flags&unix.MS_RDONLY) == 0 {
hasRwCgroupfs = true
break
}
}
}
processUid := 0
if spec.Process != nil {
// Chown the cgroup to the UID running the process,
// which is not necessarily UID 0 in the container
// namespace (e.g., an unprivileged UID in the host
// user namespace).
processUid = int(spec.Process.User.UID)
}
if hasCgroupNS && hasRwCgroupfs {
ownerUid, err := config.HostUID(processUid)
// There are two error cases; we can ignore both.
//
// 1. uidMappings is unset. Either there is no user
// namespace (fine), or it is an error (which is
// checked elsewhere).
//
// 2. The user is unmapped in the user namespace. This is an
// unusual configuration and might be an error. But it too
// will be checked elsewhere, so we can ignore it here.
//
if err == nil {
config.Cgroups.OwnerUID = &ownerUid
}
}

if spec.Process != nil {
config.OomScoreAdj = spec.Process.OOMScoreAdj
config.NoNewPrivileges = spec.Process.NoNewPrivileges
Expand Down
61 changes: 61 additions & 0 deletions tests/integration/cgroup_delegation.bats
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env bats

load helpers

function teardown() {
teardown_bundle
}

function setup() {
requires root cgroups_v2 systemd

setup_busybox

# chown test temp dir to allow host user to read it
chown 100000 "$ROOT"

# chown rootfs to allow host user to mkdir mount points
chown 100000 "$ROOT"/bundle/rootfs

set_cgroups_path

# configure a user namespace
update_config ' .linux.namespaces += [{"type": "user"}]
| .linux.uidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}]
| .linux.gidMappings += [{"hostID": 100000, "containerID": 0, "size": 65536}]
'
}

@test "runc exec (cgroup v2, ro cgroupfs, new cgroupns) does not chown cgroup" {
runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroup_chown
[ "$status" -eq 0 ]

runc exec test_cgroup_chown sh -c "stat -c %U /sys/fs/cgroup"
[ "$status" -eq 0 ]
[ "$output" = "nobody" ] # /sys/fs/cgroup owned by unmapped user
}

@test "runc exec (cgroup v2, rw cgroupfs, inh cgroupns) does not chown cgroup" {
set_cgroup_mount_writable

# inherit cgroup namespace (remove cgroup from namespaces list)
update_config '.linux.namespaces |= map(select(.type != "cgroup"))'

runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroup_chown
[ "$status" -eq 0 ]

runc exec test_cgroup_chown sh -c "stat -c %U /sys/fs/cgroup"
[ "$status" -eq 0 ]
[ "$output" = "nobody" ] # /sys/fs/cgroup owned by unmapped user
}

@test "runc exec (cgroup v2, rw cgroupfs, new cgroupns) does chown cgroup" {
set_cgroup_mount_writable

runc run -d --console-socket "$CONSOLE_SOCKET" test_cgroup_chown
[ "$status" -eq 0 ]

runc exec test_cgroup_chown sh -c "stat -c %U /sys/fs/cgroup"
[ "$status" -eq 0 ]
[ "$output" = "root" ] # /sys/fs/cgroup owned by root (of user namespace)
}