From 684d96687fdbc8c81456e8f8e8d3d7f1b774cf26 Mon Sep 17 00:00:00 2001 From: Kir Kolyshkin Date: Wed, 15 Apr 2020 18:37:18 -0700 Subject: [PATCH] cgroup: check systemd unit creation While playing with Fedora 31 host with old/broken selinux packages, I found out that systemd fails to create a transient unit. Here is an except from journalctl: > audit[1]: AVC avc: denied { setsched } for pid=1 comm="systemd" scontext=system_u:system_r:init_t:s0 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=process permissive=0 > systemd[1]: crun-555.scope: Failed to add PIDs to scope's control group: Permission denied > systemd[1]: crun-555.scope: Failed with result 'resources'. > systemd[1]: Failed to start libcrun container. and yet crun did not show any error and proceeded to start the container. Moreover, since the cgroup was not created by systemd, but the error was not detected, the container process was put into the wrong cgroup (the same as the `crun` caller). Finally, since crun gets the cgroup name from /proc/$PID/cgroup (where $PID is container process PID), it proceeded to set the limits for that (wrong) cgroup: > # cat /sys/fs/cgroup/system.slice/sshd.service/memory.max > 536870912 Finally, `crun delete` apparently removes the `system.slice/sshd.service` cgroup :( The primary cause is the missing check that the transient unit has been created. This is what this patch adds (similar to how it's done in cgroup-run code). After this patch: > # ../crun --systemd-cgroup run -d 555 > 2020-04-16T14:47:34.000354150Z: Systemd unit crun-555.scope failed: failed. See "journalctl -xe" for details. > 2020-04-16T14:47:34.000355064Z: Systemd unit crun-555.scope failed: failed. See "journalctl -xe" for details. For more background on how the issue was found, steps to repro etc please see a similar (but much less brutal -- it just fails to start the container) issue in runc: - https://github.com/opencontainers/runc/issues/2313 Signed-off-by: Kir Kolyshkin --- libocispec | 2 +- src/libcrun/cgroup.c | 31 ++++++++++++++++++++++--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/libocispec b/libocispec index ed8e5fea5c..2ca74f1b98 160000 --- a/libocispec +++ b/libocispec @@ -1 +1 @@ -Subproject commit ed8e5fea5c4a773123bcb9a6c02cb7e1fdf24c1a +Subproject commit 2ca74f1b98b5465f5a6455723d6a98647dd5d5c0 diff --git a/src/libcrun/cgroup.c b/src/libcrun/cgroup.c index ade8b20a13..e5d1927300 100644 --- a/src/libcrun/cgroup.c +++ b/src/libcrun/cgroup.c @@ -721,6 +721,8 @@ struct systemd_job_removed_s { const char *path; int *terminated; + char *unit; + char *result; }; static int @@ -736,7 +738,11 @@ systemd_job_removed (sd_bus_message *m, void *userdata, sd_bus_error *error arg_ return -1; if (strcmp (p->path, path) == 0) - *p->terminated = 1; + { + *p->terminated = 1; + p->unit = xstrdup(unit); + p->result = xstrdup(result); + } return 0; } @@ -922,14 +928,13 @@ int enter_systemd_cgroup_scope (runtime_spec_schema_config_linux_resources *reso } } - sd_err = sd_bus_add_match (bus, - NULL, - "type='signal'," - "sender='org.freedesktop.systemd1'," - "interface='org.freedesktop.systemd1.Manager'," - "member='JobRemoved'," - "path='/org/freedesktop/systemd1'", - systemd_job_removed, &userdata); + sd_err = sd_bus_match_signal (bus, + NULL, + "org.freedesktop.systemd1", + "/org/freedesktop/systemd1", + "org.freedesktop.systemd1.Manager", + "JobRemoved", + systemd_job_removed, &userdata); if (UNLIKELY (sd_err < 0)) { ret = crun_make_error (err, -sd_err, "sd-bus add match"); @@ -1073,6 +1078,14 @@ int enter_systemd_cgroup_scope (runtime_spec_schema_config_linux_resources *reso } } + if (userdata.unit && userdata.result) + { + if (strcmp(userdata.result, "done") != 0) + { + ret = crun_make_error (err, 0, "Systemd unit %s failed: %s. See \"journalctl -xe\" for details.", userdata.unit, userdata.result); + } + } + exit: if (bus) sd_bus_unref (bus);