Skip to content

Commit

Permalink
Merge commit from fork
Browse files Browse the repository at this point in the history
[1.1] rootfs: try to scope MkdirAll to stay inside the rootfs
  • Loading branch information
cyphar committed Sep 3, 2024
2 parents 6419fba + f0b652e commit a86c3d8
Show file tree
Hide file tree
Showing 4 changed files with 263 additions and 112 deletions.
28 changes: 6 additions & 22 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1276,8 +1276,7 @@ func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts
// restore using CRIU. This function is inspired from the code in
// rootfs_linux.go
func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
switch m.Device {
case "cgroup":
if m.Device == "cgroup" {
// No mount point(s) need to be created:
//
// * for v1, mount points are saved by CRIU because
Expand All @@ -1286,26 +1285,11 @@ func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error {
// * for v2, /sys/fs/cgroup is a real mount, but
// the mountpoint appears as soon as /sys is mounted
return nil
case "bind":
// The prepareBindMount() function checks if source
// exists. So it cannot be used for other filesystem types.
// TODO: pass something else than nil? Not sure if criu is
// impacted by issue #2484
if err := prepareBindMount(m, c.config.Rootfs, nil); err != nil {
return err
}
default:
// for all other filesystems just create the mountpoints
dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination)
if err != nil {
return err
}
if err := checkProcMount(c.config.Rootfs, dest, m, ""); err != nil {
return err
}
if err := os.MkdirAll(dest, 0o755); err != nil {
return err
}
}
// TODO: pass something else than nil? Not sure if criu is
// impacted by issue #2484
if _, err := createMountpoint(c.config.Rootfs, m, nil, ""); err != nil {
return fmt.Errorf("create criu restore mount for %s mount: %w", m.Destination, err)
}
return nil
}
Expand Down
177 changes: 87 additions & 90 deletions libcontainer/rootfs_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,36 +224,6 @@ func mountCmd(cmd configs.Command) error {
return nil
}

func prepareBindMount(m *configs.Mount, rootfs string, mountFd *int) error {
source := m.Source
if mountFd != nil {
source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
}

stat, err := os.Stat(source)
if err != nil {
// error out if the source of a bind mount does not exist as we will be
// unable to bind anything to it.
return err
}
// ensure that the destination of the bind mount is resolved of symlinks at mount time because
// any previous mounts can invalidate the next mount's destination.
// this can happen when a user specifies mounts within other mounts to cause breakouts or other
// evil stuff to try to escape the container's rootfs.
var dest string
if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil {
return err
}
if err := checkProcMount(rootfs, dest, m, source); err != nil {
return err
}
if err := createIfNotExists(dest, stat.IsDir()); err != nil {
return err
}

return nil
}

func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
binds, err := getCgroupMounts(m)
if err != nil {
Expand Down Expand Up @@ -282,7 +252,8 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
for _, b := range binds {
if c.cgroupns {
subsystemPath := filepath.Join(c.root, b.Destination)
if err := os.MkdirAll(subsystemPath, 0o755); err != nil {
subsystemName := filepath.Base(b.Destination)
if err := utils.MkdirAllInRoot(c.root, subsystemPath, 0o755); err != nil {
return err
}
if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error {
Expand All @@ -292,7 +263,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
}
var (
source = "cgroup"
data = filepath.Base(subsystemPath)
data = subsystemName
)
if data == "systemd" {
data = cgroups.CgroupNamePrefix + data
Expand Down Expand Up @@ -322,14 +293,7 @@ func mountCgroupV1(m *configs.Mount, c *mountConfig) error {
}

func mountCgroupV2(m *configs.Mount, c *mountConfig) error {
dest, err := securejoin.SecureJoin(c.root, m.Destination)
if err != nil {
return err
}
if err := os.MkdirAll(dest, 0o755); err != nil {
return err
}
err = utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
err := utils.WithProcfd(c.root, m.Destination, func(procfd string) error {
return mount(m.Source, m.Destination, procfd, "cgroup2", uintptr(m.Flags), m.Data)
})
if err == nil || !(errors.Is(err, unix.EPERM) || errors.Is(err, unix.EBUSY)) {
Expand Down Expand Up @@ -411,6 +375,81 @@ func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) {
})
}

var errRootfsToFile = errors.New("config tries to change rootfs to file")

func createMountpoint(rootfs string, m *configs.Mount, mountFd *int, source string) (string, error) {
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
if err != nil {
return "", err
}
if err := checkProcMount(rootfs, dest, m, source); err != nil {
return "", fmt.Errorf("check proc-safety of %s mount: %w", m.Destination, err)
}

switch m.Device {
case "bind":
source := m.Source
if mountFd != nil {
source = "/proc/self/fd/" + strconv.Itoa(*mountFd)
}

fi, err := os.Stat(source)
if err != nil {
// Error out if the source of a bind mount does not exist as we
// will be unable to bind anything to it.
return "", fmt.Errorf("bind mount source stat: %w", err)
}
// If the original source is not a directory, make the target a file.
if !fi.IsDir() {
// Make sure we aren't tricked into trying to make the root a file.
if rootfs == dest {
return "", fmt.Errorf("%w: file bind mount over rootfs", errRootfsToFile)
}
// Make the parent directory.
destDir, destBase := filepath.Split(dest)
destDirFd, err := utils.MkdirAllInRootOpen(rootfs, destDir, 0o755)
if err != nil {
return "", fmt.Errorf("make parent dir of file bind-mount: %w", err)
}
defer destDirFd.Close()
// Make the target file. We want to avoid opening any file that is
// already there because it could be a "bad" file like an invalid
// device or hung tty that might cause a DoS, so we use mknodat.
// destBase does not contain any "/" components, and mknodat does
// not follow trailing symlinks, so we can safely just call mknodat
// here.
if err := unix.Mknodat(int(destDirFd.Fd()), destBase, unix.S_IFREG|0o644, 0); err != nil {
// If we get EEXIST, there was already an inode there and
// we can consider that a success.
if !errors.Is(err, unix.EEXIST) {
err = &os.PathError{Op: "mknod regular file", Path: dest, Err: err}
return "", fmt.Errorf("create target of file bind-mount: %w", err)
}
}
// Nothing left to do.
return dest, nil
}

case "tmpfs":
// If the original target exists, copy the mode for the tmpfs mount.
if stat, err := os.Stat(dest); err == nil {
dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode()))
if m.Data != "" {
dt = dt + "," + m.Data
}
m.Data = dt

// Nothing left to do.
return dest, nil
}
}

if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
return "", err
}
return dest, nil
}

func mountToRootfs(m *configs.Mount, c *mountConfig) error {
rootfs := c.root

Expand All @@ -435,53 +474,34 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
} else if !fi.IsDir() {
return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device)
}
if err := os.MkdirAll(dest, 0o755); err != nil {
if err := utils.MkdirAllInRoot(rootfs, dest, 0o755); err != nil {
return err
}
// Selinux kernels do not support labeling of /proc or /sys.
return mountPropagate(m, rootfs, "", nil)
}

mountLabel := c.label
mountFd := c.fd
dest, err := securejoin.SecureJoin(rootfs, m.Destination)
dest, err := createMountpoint(rootfs, m, mountFd, m.Source)
if err != nil {
return err
return fmt.Errorf("create mount destination for %s mount: %w", m.Destination, err)
}
mountLabel := c.label

switch m.Device {
case "mqueue":
if err := os.MkdirAll(dest, 0o755); err != nil {
return err
}
if err := mountPropagate(m, rootfs, "", nil); err != nil {
return err
}
return label.SetFileLabel(dest, mountLabel)
case "tmpfs":
if stat, err := os.Stat(dest); err != nil {
if err := os.MkdirAll(dest, 0o755); err != nil {
return err
}
} else {
dt := fmt.Sprintf("mode=%04o", syscallMode(stat.Mode()))
if m.Data != "" {
dt = dt + "," + m.Data
}
m.Data = dt
}

if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP {
err = doTmpfsCopyUp(m, rootfs, mountLabel)
} else {
err = mountPropagate(m, rootfs, mountLabel, nil)
}

return err
case "bind":
if err := prepareBindMount(m, rootfs, mountFd); err != nil {
return err
}
if err := mountPropagate(m, rootfs, mountLabel, mountFd); err != nil {
return err
}
Expand Down Expand Up @@ -509,12 +529,6 @@ func mountToRootfs(m *configs.Mount, c *mountConfig) error {
}
return mountCgroupV1(m, c)
default:
if err := checkProcMount(rootfs, dest, m, m.Source); err != nil {
return err
}
if err := os.MkdirAll(dest, 0o755); err != nil {
return err
}
return mountPropagate(m, rootfs, mountLabel, mountFd)
}
if err := setRecAttr(m, rootfs); err != nil {
Expand Down Expand Up @@ -745,7 +759,10 @@ func createDeviceNode(rootfs string, node *devices.Device, bind bool) error {
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil {
if dest == rootfs {
return fmt.Errorf("%w: mknod over rootfs", errRootfsToFile)
}
if err := utils.MkdirAllInRoot(rootfs, filepath.Dir(dest), 0o755); err != nil {
return err
}
if bind {
Expand Down Expand Up @@ -1011,26 +1028,6 @@ func chroot() error {
return nil
}

// createIfNotExists creates a file or a directory only if it does not already exist.
func createIfNotExists(path string, isDir bool) error {
if _, err := os.Stat(path); err != nil {
if os.IsNotExist(err) {
if isDir {
return os.MkdirAll(path, 0o755)
}
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
return err
}
f, err := os.OpenFile(path, os.O_CREATE, 0o755)
if err != nil {
return err
}
_ = f.Close()
}
}
return nil
}

// readonlyPath will make a path read only.
func readonlyPath(path string) error {
if err := mount(path, path, "", "", unix.MS_BIND|unix.MS_REC, ""); err != nil {
Expand Down
41 changes: 41 additions & 0 deletions libcontainer/system/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ package system
import (
"os"
"os/exec"
"runtime"
"strings"
"unsafe"

"golang.org/x/sys/unix"
Expand Down Expand Up @@ -102,3 +104,42 @@ func GetSubreaper() (int, error) {

return int(i), nil
}

func prepareAt(dir *os.File, path string) (int, string) {
if dir == nil {
return unix.AT_FDCWD, path
}

// Rather than just filepath.Join-ing path here, do it manually so the
// error and handle correctly indicate cases like path=".." as being
// relative to the correct directory. The handle.Name() might end up being
// wrong but because this is (currently) only used in MkdirAllInRoot, that
// isn't a problem.
dirName := dir.Name()
if !strings.HasSuffix(dirName, "/") {
dirName += "/"
}
fullPath := dirName + path

return int(dir.Fd()), fullPath
}

func Openat(dir *os.File, path string, flags int, mode uint32) (*os.File, error) {
dirFd, fullPath := prepareAt(dir, path)
fd, err := unix.Openat(dirFd, path, flags, mode)
if err != nil {
return nil, &os.PathError{Op: "openat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return os.NewFile(uintptr(fd), fullPath), nil
}

func Mkdirat(dir *os.File, path string, mode uint32) error {
dirFd, fullPath := prepareAt(dir, path)
err := unix.Mkdirat(dirFd, path, mode)
if err != nil {
err = &os.PathError{Op: "mkdirat", Path: fullPath, Err: err}
}
runtime.KeepAlive(dir)
return err
}
Loading

0 comments on commit a86c3d8

Please sign in to comment.