Skip to content

Commit

Permalink
[antlir2][rootless] support booted containers in rootless mode
Browse files Browse the repository at this point in the history
Summary:
Instead of forcing the use of `systemd-nspawn`, allow for booting `systemd` as
init inside unprivileged containers.

This enables fully rootless booted image tests that do not require `sudo` for
the image build or the actual invocation.

Rooted builds continue to use `systemd-nspawn`.

Test Plan:
```
❯ buck2 test fbcode//antlir/antlir2/testing/tests/...
Buck UI: https://www.internalfb.com/buck2/0629229a-7e9f-48b2-bcce-cb427a561ec5
Test UI: https://www.internalfb.com/intern/testinfra/testrun/6192449698232658
Tests finished: Pass 402. Fail 0. Fatal 0. Skip 0. Build failure 0
```

Reviewed By: epilatow

Differential Revision: D68917919

fbshipit-source-id: 95a7c58bdea79d566083aec61901b2939b9c8d28
  • Loading branch information
vmagro authored and facebook-github-bot committed Jan 31, 2025
1 parent d54f493 commit a773152
Show file tree
Hide file tree
Showing 7 changed files with 53 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,19 @@ pub(crate) fn setup_isolation(isol: &IsolationContext) -> Result<()> {
tmpfs_overlay,
hostname,
readonly,
// isolate_unshare crate already ensures that these are not configured
invocation_type: _,
register: _,
enable_network,
invocation_type,
// isolate_unshare crate already ensures that this is not set
register: _,
} = isol;

let mut clone_flags = CloneFlags::CLONE_NEWNS | CloneFlags::CLONE_NEWUTS;
if !enable_network {
clone_flags |= CloneFlags::CLONE_NEWNET;
}
if invocation_type.booted() {
clone_flags |= CloneFlags::CLONE_NEWCGROUP;
}

unshare(clone_flags).context("while unsharing into new namespaces")?;

Expand Down Expand Up @@ -179,11 +182,16 @@ pub(crate) fn setup_isolation(isol: &IsolationContext) -> Result<()> {
.open_dir(tmpfs.strip_abs())
.with_context(|| format!("while opening tmpfs '{}'", tmpfs.display()))?;
if dev {
tmpfs
.symlink_contents("/proc/self/fd", "fd")
.context("while creating /dev/fd symlink")?;
// when booted, systemd will create /dev/fd
if !invocation_type.booted() {
tmpfs
.symlink_contents("/proc/self/fd", "fd")
.context("while creating /dev/fd symlink")?;
}

for devname in ["fuse", "null", "random", "urandom"] {
for devname in [
"fuse", "null", "zero", "full", "random", "urandom", "tty", "ptmx",
] {
let dev = tmpfs
.create(devname)
.with_context(|| format!("while creating device file '{devname}'"))?;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ fn do_main(args: Main) -> Result<()> {
// performed by the first forked process (pid 1) in that namespace
unshare(CloneFlags::CLONE_NEWPID).context("while unsharing into new pid namespace")?;
let mut pid1 = Command::new(std::env::current_exe().context("while getting current exe")?);
pid1.arg("pid1")
.arg(
serde_json::to_string(args.isolation.as_inner())
.context("while serializing isolation info")?,
)
.arg(args.program)
.arg("--")
.args(args.program_args);
pid1.arg("pid1").arg(
serde_json::to_string(args.isolation.as_inner())
.context("while serializing isolation info")?,
);
if args.isolation.as_inner().invocation_type.booted() {
pid1.arg("--exec-init");
}
pid1.arg(args.program).arg("--").args(args.program_args);
let mut pid1 = pid1.spawn().context("while spawning pid1")?;
let status = pid1.wait().context("while waiting for pid1")?;
if status.success() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
*/

use std::ffi::OsString;
use std::os::unix::process::CommandExt;

use anyhow::ensure;
use anyhow::Context;
use anyhow::Error;
use anyhow::Result;
use clap::Parser;
use isolate_cfg::IsolationContext;
Expand All @@ -26,6 +28,10 @@ use crate::isolation;
#[derive(Parser, Debug)]
pub(crate) struct Pid1Args {
isolation: Json<IsolationContext<'static>>,
#[clap(long)]
/// Treat PROGRAM and PROGRAM_ARGS as an init application that should be
/// 'exec'ed after setting up the antlir container isolation
exec_init: bool,
program: OsString,
#[clap(last = true)]
program_args: Vec<OsString>,
Expand Down Expand Up @@ -63,6 +69,11 @@ async fn pid1_async(args: Pid1Args) -> Result<()> {
pid2.env(key, val);
}
pid2.args(args.program_args);

if args.exec_init {
return Err(Error::from(pid2.as_std_mut().exec()).context("failed to 'exec' init process"));
}

let mut pid2 = pid2.spawn().context("while spawning pid2")?;
// I call this pid2, but it might not actually be 2, so grab it now
let pid2_id = Pid::from_raw(pid2.id().context("while getting pid2 pid")? as i32);
Expand Down
12 changes: 6 additions & 6 deletions antlir/antlir2/antlir2_isolate/isolate_unshare/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ pub struct IsolatedContext<'a>(IsolationContext<'a>);

impl<'a> IsolatedContext<'a> {
pub fn command<S: AsRef<OsStr>>(&self, program: S) -> Result<Command> {
// TODO: remove these settings entirely when we get rid of
// systemd-nspawn / move the things that require this (like image_test)
// to *only* use systemd-nspawn
if self.0.invocation_type != InvocationType::Pid2Pipe {
return Err(Error::UnsupportedSetting("invocation_type"));
}
if self.0.register {
return Err(Error::UnsupportedSetting("register"));
}
// TODO: support this when we can bind the controlling terminal to
// /dev/console, otherwise don't lie about providing an interactive
// console
if self.0.invocation_type == InvocationType::BootInteractive {
return Err(Error::UnsupportedSetting("invocation_type=BootInteractive"));
}

let mut cmd = Command::new(
buck_resources::get("antlir/antlir2/antlir2_isolate/isolate_unshare/preexec")
Expand Down
7 changes: 6 additions & 1 deletion antlir/antlir2/testing/image_test/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,15 @@ fn main() -> Result<()> {

let args = Args::parse();

match args {
if let Err(e) = match args {
Args::Spawn(a) => a.run(),
Args::Exec(a) => a.run(),
Args::ShellHelp(a) => a.run(),
Args::Container(a) => a.run(),
} {
eprintln!("{e:#}");
Err(e)
} else {
Ok(())
}
}
17 changes: 7 additions & 10 deletions antlir/antlir2/testing/image_test/src/spawn_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,6 @@ pub(crate) fn run(

match spec.boot {
Some(boot) => {
ensure!(
!spec.rootless,
"TODO(T187078382): booted tests still must use systemd-nspawn and are incompatible with rootless"
);

let container_stdout = container_stdout_file()?;
let (mut test_stdout, mut test_stderr) = make_log_files("test")?;

Expand Down Expand Up @@ -226,12 +221,14 @@ pub(crate) fn run(
exec_spec_file.path(),
));

// Register the test container with systemd-machined so manual debugging
// is a easier.
ctx.register(true);
let mut isol = if spec.rootless {
let mut isol = unshare(ctx.build())?.command("/sbin/init")?;
isol.arg("systemd.unit=antlir2_image_test.service");
isol
} else {
nspawn(ctx.build())?.command("systemd.unit=antlir2_image_test.service")?
};

let mut isol =
nspawn(ctx.build())?.command("systemd.unit=antlir2_image_test.service")?;
isol.arg("systemd.journald.forward_to_console=1")
.arg("systemd.log_time=1")
.arg("systemd.setenv=ANTLIR2_IMAGE_TEST=1");
Expand Down
4 changes: 0 additions & 4 deletions antlir/antlir2/testing/tests/test.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ def test_variants(
oss = ("centos9",),
),
):
if rootless and boot:
# TODO(T187078382): booted tests still must use
# systemd-nspawn and are incompatible with rootless
continue
name_parts = (
"test",
lang,
Expand Down

0 comments on commit a773152

Please sign in to comment.