Skip to content

Commit

Permalink
use runc-dmz to defeat CVE-2019-5736
Browse files Browse the repository at this point in the history
Signed-off-by: lifubang <lifubang@acmcoder.com>
  • Loading branch information
lifubang committed Aug 15, 2023
1 parent 6eae7ce commit ada7109
Show file tree
Hide file tree
Showing 6 changed files with 78 additions and 173 deletions.
1 change: 1 addition & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -492,6 +492,7 @@ func (c *Container) commandTemplate(p *Process, childInitPipe *os.File, childLog
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
"_LIBCONTAINER_STATEDIR="+c.root,
"_LIBCONTAINER_DMZFD=-1",
)

cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe)
Expand Down
13 changes: 11 additions & 2 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,12 @@ func startInitialization() (retErr error) {
return err
}

// Get runc-dmz fds.
dmzFd, err := strconv.Atoi(os.Getenv("_LIBCONTAINER_DMZFD"))
if err != nil {
return fmt.Errorf("dmzFd error: %w", err)
}

// clear the current process's environment to clean any libcontainer
// specific env vars.
os.Clearenv()
Expand All @@ -201,17 +207,18 @@ func startInitialization() (retErr error) {
}()

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, pipe, consoleSocket, fifofd, logFD, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
return containerInit(it, pipe, consoleSocket, fifofd, logFD, dmzFd, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
}

func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int, mountFds mountFds) error {
func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd, dmzFd int, mountFds mountFds) error {
var config *initConfig
if err := json.NewDecoder(pipe).Decode(&config); err != nil {
return err
}
if err := populateProcessEnvironment(config.Env); err != nil {
return err
}

switch t {
case initSetns:
// mount and idmap fds must be nil in this case. We don't mount while doing runc exec.
Expand All @@ -224,6 +231,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo
consoleSocket: consoleSocket,
config: config,
logFd: logFd,
dmzFd: dmzFd,
}
return i.Init()
case initStandard:
Expand All @@ -233,6 +241,7 @@ func containerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, lo
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
dmzFd: dmzFd,
logFd: logFd,
mountFds: mountFds,
}
Expand Down
185 changes: 16 additions & 169 deletions libcontainer/nsenter/cloned_binary.c
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ int memfd_create(const char *name, unsigned int flags)
#endif

#define CLONED_BINARY_ENV "_LIBCONTAINER_CLONED_BINARY"
#define RUNC_MEMFD_COMMENT "runc_cloned:/proc/self/exe"
#define RUNC_MEMFD_COMMENT "runc_cloned:runc-dmz"
/*
* There are newer memfd seals (such as F_SEAL_FUTURE_WRITE and F_SEAL_EXEC),
* which we use opportunistically. However, this set is the original set of
Expand All @@ -142,162 +142,6 @@ int memfd_create(const char *name, unsigned int flags)
#define RUNC_MEMFD_MIN_SEALS \
(F_SEAL_SEAL | F_SEAL_SHRINK | F_SEAL_GROW | F_SEAL_WRITE)

static void *must_realloc(void *ptr, size_t size)
{
void *old = ptr;
do {
ptr = realloc(old, size);
} while (!ptr);
return ptr;
}

/*
* Verify whether we are currently in a self-cloned program (namely, is
* /proc/self/exe a memfd). F_GET_SEALS will only succeed for memfds (or rather
* for shmem files), and we want to be sure it's actually sealed.
*/
static int is_self_cloned(void)
{
int fd, seals = 0, is_cloned = false;
struct stat statbuf = { };
struct statfs fsbuf = { };

fd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
if (fd < 0) {
write_log(ERROR, "cannot open runc binary for reading: open /proc/self/exe: %m");
return -ENOTRECOVERABLE;
}

/*
* Is the binary a fully-sealed memfd? We don't need CLONED_BINARY_ENV for
* this, because you cannot write to a sealed memfd no matter what.
*/
seals = fcntl(fd, F_GET_SEALS);
if (seals >= 0) {
write_log(DEBUG, "checking /proc/self/exe memfd seals: 0x%x", seals);
is_cloned = (seals & RUNC_MEMFD_MIN_SEALS) == RUNC_MEMFD_MIN_SEALS;
if (is_cloned)
goto out;
}

/*
* All other forms require CLONED_BINARY_ENV, since they are potentially
* writeable (or we can't tell if they're fully safe) and thus we must
* check the environment as an extra layer of defence.
*/
if (!getenv(CLONED_BINARY_ENV)) {
is_cloned = false;
goto out;
}

/*
* Is the binary on a read-only filesystem? We can't detect bind-mounts in
* particular (in-kernel they are identical to regular mounts) but we can
* at least be sure that it's read-only. In addition, to make sure that
* it's *our* bind-mount we check CLONED_BINARY_ENV.
*/
if (fstatfs(fd, &fsbuf) >= 0)
is_cloned |= (fsbuf.f_flags & MS_RDONLY);

/*
* Okay, we're a tmpfile -- or we're currently running on RHEL <=7.6
* which appears to have a borked backport of F_GET_SEALS. Either way,
* having a file which has no hardlinks indicates that we aren't using
* a host-side "runc" binary and this is something that a container
* cannot fake (because unlinking requires being able to resolve the
* path that you want to unlink).
*/
if (fstat(fd, &statbuf) >= 0)
is_cloned |= (statbuf.st_nlink == 0);

out:
close(fd);
return is_cloned;
}

/* Read a given file into a new buffer, and providing the length. */
static char *read_file(char *path, size_t *length)
{
int fd;
char buf[4096], *copy = NULL;

if (!length)
return NULL;

fd = open(path, O_RDONLY | O_CLOEXEC);
if (fd < 0)
return NULL;

*length = 0;
for (;;) {
ssize_t n;

n = read(fd, buf, sizeof(buf));
if (n < 0)
goto error;
if (!n)
break;

copy = must_realloc(copy, (*length + n) * sizeof(*copy));
memcpy(copy + *length, buf, n);
*length += n;
}
close(fd);
return copy;

error:
close(fd);
free(copy);
return NULL;
}

/*
* A poor-man's version of "xargs -0". Basically parses a given block of
* NUL-delimited data, within the given length and adds a pointer to each entry
* to the array of pointers.
*/
static int parse_xargs(char *data, int data_length, char ***output)
{
int num = 0;
char *cur = data;

if (!data || *output != NULL)
return -1;

while (cur < data + data_length) {
num++;
*output = must_realloc(*output, (num + 1) * sizeof(**output));
(*output)[num - 1] = cur;
cur += strlen(cur) + 1;
}
(*output)[num] = NULL;
return num;
}

/*
* "Parse" out argv from /proc/self/cmdline.
* This is necessary because we are running in a context where we don't have a
* main() that we can just get the arguments from.
*/
static int fetchve(char ***argv)
{
char *cmdline = NULL;
size_t cmdline_size;

cmdline = read_file("/proc/self/cmdline", &cmdline_size);
if (!cmdline)
goto error;

if (parse_xargs(cmdline, cmdline_size, argv) <= 0)
goto error;

return 0;

error:
free(cmdline);
return -EINVAL;
}

enum {
EFD_NONE = 0,
EFD_MEMFD,
Expand Down Expand Up @@ -499,12 +343,20 @@ static int clone_binary(void)
struct stat statbuf = { };
size_t sent = 0;
int fdtype = EFD_NONE;
char runcpath[PATH_MAX] = { 0 };
char dmzpath[PATH_MAX] = { 0 };

execfd = make_execfd(&fdtype);
if (execfd < 0 || fdtype == EFD_NONE)
return -ENOTRECOVERABLE;

binfd = open("/proc/self/exe", O_RDONLY | O_CLOEXEC);
if (readlink("/proc/self/exe", runcpath, PATH_MAX) < 1)
goto error;

if (snprintf(dmzpath, PATH_MAX, "%s%s", runcpath, "-dmz") < 0)
goto error;

binfd = open(dmzpath, O_RDONLY | O_CLOEXEC);
if (binfd < 0)
goto error;

Expand Down Expand Up @@ -543,24 +395,19 @@ extern char **environ;
int ensure_cloned_binary(void)
{
int execfd;
char **argv = NULL;

/* Check that we're not self-cloned, and if we are then bail. */
int cloned = is_self_cloned();
if (cloned > 0 || cloned == -ENOTRECOVERABLE)
return cloned;

if (fetchve(&argv) < 0)
return -EINVAL;

execfd = clone_binary();
if (execfd < 0)
return -EIO;

if (putenv(CLONED_BINARY_ENV "=1"))
char envString[PATH_MAX] = { 0 };
if (sprintf(envString, "%d", execfd) < 0)
goto error;

if (setenv("_LIBCONTAINER_DMZFD", envString, 1))
goto error;

fexecve(execfd, argv, environ);
return 0;
error:
close(execfd);
return -ENOEXEC;
Expand Down
10 changes: 9 additions & 1 deletion libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"errors"
"fmt"
"os"
"os/exec"
"strconv"

"github.com/opencontainers/selinux/go-selinux"
Expand All @@ -23,6 +24,7 @@ type linuxSetnsInit struct {
consoleSocket *os.File
config *initConfig
logFd int
dmzFd int
}

func (l *linuxSetnsInit) getSessionRingName() string {
Expand Down Expand Up @@ -100,10 +102,16 @@ func (l *linuxSetnsInit) Init() error {
}
}
logrus.Debugf("setns_init: about to exec")

// Close the log pipe fd so the parent's ForwardLogs can exit.
if err := unix.Close(l.logFd); err != nil {
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
}

return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ())
entryPoint, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
dmzArgs := []string{entryPoint}
return system.Fexecve(uintptr(l.dmzFd), append(dmzArgs, l.config.Args[1:]...), os.Environ())
}
8 changes: 7 additions & 1 deletion libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type linuxStandardInit struct {
consoleSocket *os.File
parentPid int
fifoFd int
dmzFd int
logFd int
mountFds mountFds
config *initConfig
Expand Down Expand Up @@ -262,5 +263,10 @@ func (l *linuxStandardInit) Init() error {
return err
}

return system.Exec(name, l.config.Args[0:], os.Environ())
entryPoint, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
dmzArgs := []string{entryPoint}
return system.Fexecve(uintptr(l.dmzFd), append(dmzArgs, l.config.Args[1:]...), os.Environ())
}
34 changes: 34 additions & 0 deletions libcontainer/system/linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package system
import (
"os"
"os/exec"
"syscall"
"unsafe"

"golang.org/x/sys/unix"
Expand Down Expand Up @@ -49,6 +50,39 @@ func Exec(cmd string, args []string, env []string) error {
}
}

func Execveat(fd uintptr, pathname string, args []string, env []string, flags int) error {
pathnamep, err := syscall.BytePtrFromString(pathname)
if err != nil {
return err
}

argv, err := syscall.SlicePtrFromStrings(args)
if err != nil {
return err
}

envs, err := syscall.SlicePtrFromStrings(env)
if err != nil {
return err
}

_, _, errno := syscall.Syscall6(
unix.SYS_EXECVEAT,
fd,
uintptr(unsafe.Pointer(pathnamep)),
uintptr(unsafe.Pointer(&argv[0])),
uintptr(unsafe.Pointer(&envs[0])),
uintptr(flags),
0,
)

return errno
}

func Fexecve(fd uintptr, args []string, env []string) error {
return Execveat(fd, "", args, env, unix.AT_EMPTY_PATH)
}

func SetParentDeathSignal(sig uintptr) error {
if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil {
return err
Expand Down

0 comments on commit ada7109

Please sign in to comment.