Skip to content

Commit

Permalink
Merge pull request #98 from Furisto/rootless
Browse files Browse the repository at this point in the history
Experimental support for rootless containers
  • Loading branch information
utam0k authored Jun 19, 2021
2 parents 58e33bb + 4909c3b commit 1afde70
Show file tree
Hide file tree
Showing 11 changed files with 395 additions and 89 deletions.
4 changes: 2 additions & 2 deletions oci_spec/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,9 @@ pub enum FreezerState {
#[derive(Serialize, Deserialize, Debug, Clone)]
#[serde(rename_all = "camelCase")]
pub struct Linux {
#[serde(default, rename = "LinuxIDMapping")]
#[serde(default)]
pub uid_mappings: Vec<LinuxIdMapping>,
#[serde(default, rename = "LinuxIDMapping")]
#[serde(default)]
pub gid_mappings: Vec<LinuxIdMapping>,
#[serde(default)]
pub sysctl: HashMap<String, String>,
Expand Down
29 changes: 19 additions & 10 deletions src/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,17 @@ use nix::sched;
use nix::unistd;
use nix::unistd::{Gid, Uid};

use crate::cgroups;
use crate::container::{Container, ContainerStatus};
use crate::namespaces::Namespaces;
use crate::notify_socket::NotifyListener;
use crate::process::{fork, Process};
use crate::rootfs;
use crate::rootless::{lookup_map_binaries, should_use_rootless, Rootless};
use crate::stdio::FileDescriptor;
use crate::tty;
use crate::utils;
use crate::{capabilities, command::Command};
use crate::{cgroups, rootless};

/// This is the main structure which stores various commandline options given by
/// high-level container runtime
Expand Down Expand Up @@ -131,19 +132,27 @@ fn run_container<P: AsRef<Path>>(
let linux = spec.linux.as_ref().unwrap();
let namespaces: Namespaces = linux.namespaces.clone().into();

let rootless = if should_use_rootless() {
log::debug!("rootless container should be created");
log::warn!(
"resource constraints and multi id mapping is unimplemented for rootless containers"
);
rootless::validate(&spec)?;
let mut rootless = Rootless::from(linux);
if let Some((uid_binary, gid_binary)) = lookup_map_binaries(linux)? {
rootless.newuidmap = Some(uid_binary);
rootless.newgidmap = Some(gid_binary);
}
Some(rootless)
} else {
None
};

let cgroups_path = utils::get_cgroup_path(&linux.cgroups_path, container.id());
let cmanager = cgroups::common::create_cgroup_manager(&cgroups_path)?;

// first fork, which creates process, which will later create actual container process
match fork::fork_first(
pid_file,
namespaces
.clone_flags
.contains(sched::CloneFlags::CLONE_NEWUSER),
linux,
&container,
cmanager,
)? {
match fork::fork_first(pid_file, rootless, linux, &container, cmanager)? {
// In the parent process, which called run_container
Process::Parent(parent) => Ok(Process::Parent(parent)),
// in child process
Expand Down
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ pub mod notify_socket;
pub mod pipe;
pub mod process;
pub mod rootfs;
pub mod rootless;
pub mod signal;
pub mod start;
pub mod stdio;
Expand Down
7 changes: 6 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use nix::sys::signal as nix_signal;
use youki::command::linux::LinuxCommand;
use youki::container::{Container, ContainerStatus};
use youki::create;
use youki::rootless::should_use_rootless;
use youki::signal;
use youki::start;

Expand Down Expand Up @@ -79,7 +80,11 @@ fn main() -> Result<()> {
eprintln!("log init failed: {:?}", e);
}

let root_path = PathBuf::from(&opts.root);
let root_path = if should_use_rootless() && opts.root.eq(&PathBuf::from("/run/youki")) {
PathBuf::from("/tmp/rootless")
} else {
PathBuf::from(&opts.root)
};
fs::create_dir_all(&root_path)?;

match opts.subcmd {
Expand Down
31 changes: 12 additions & 19 deletions src/process/child.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::io::ErrorKind;
use std::io::Read;
use std::io::Write;

use anyhow::{bail, Result};
use mio::unix::pipe;
Expand All @@ -9,6 +8,7 @@ use mio::unix::pipe::Sender;
use mio::{Events, Interest, Poll, Token};
use nix::unistd::Pid;

use super::parent::ParentChannel;
use super::{MAX_EVENTS, WAIT_FOR_INIT};
use crate::process::message::Message;

Expand All @@ -18,7 +18,7 @@ const CHILD: Token = Token(1);
/// Contains sending end of pipe for parent process, receiving end of pipe
/// for the init process and poller for that
pub struct ChildProcess {
sender_for_parent: Sender,
parent_channel: ParentChannel,
receiver: Option<Receiver>,
poll: Option<Poll>,
}
Expand All @@ -29,9 +29,9 @@ pub struct ChildProcess {
// a process point of view, init process is child of child process, which is child of original youki process.
impl ChildProcess {
/// create a new Child process structure
pub fn new(sender_for_parent: Sender) -> Result<Self> {
pub fn new(parent_channel: ParentChannel) -> Result<Self> {
Ok(Self {
sender_for_parent,
parent_channel,
receiver: None,
poll: None,
})
Expand All @@ -55,24 +55,17 @@ impl ChildProcess {

/// Indicate that child process has forked the init process to parent process
pub fn notify_parent(&mut self, init_pid: Pid) -> Result<()> {
log::debug!(
"child send to parent {:?}",
(Message::ChildReady as u8).to_be_bytes()
);
// write ChildReady message to the pipe to parent
self.write_message_for_parent(Message::ChildReady)?;
// write pid of init process which is forked by child process to the pipe,
// Pid in nix::unistd is type alias of SessionId which itself is alias of i32
self.sender_for_parent
.write_all(&(init_pid.as_raw()).to_be_bytes())?;
self.parent_channel.send_init_pid(init_pid)?;
Ok(())
}

pub fn request_identifier_mapping(&mut self) -> Result<()> {
self.parent_channel.request_identifier_mapping()?;
Ok(())
}

/// writes given message to pipe for the parent
#[inline]
fn write_message_for_parent(&mut self, msg: Message) -> Result<()> {
self.sender_for_parent
.write_all(&(msg as u8).to_be_bytes())?;
pub fn wait_for_mapping_ack(&mut self) -> Result<()> {
self.parent_channel.wait_for_mapping_ack()?;
Ok(())
}

Expand Down
31 changes: 18 additions & 13 deletions src/process/fork.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,23 @@ use nix::unistd;
use nix::unistd::Pid;

use crate::cgroups::common::CgroupManager;
use crate::container::Container;
use crate::container::ContainerStatus;
use crate::process::{child, init, parent, Process};
use crate::{container::Container, pipe::Pipe};
use crate::rootless::Rootless;

/// Function to perform the first fork for in order to run the container process
pub fn fork_first<P: AsRef<Path>>(
pid_file: Option<P>,
is_userns: bool,
rootless: Option<Rootless>,
linux: &oci_spec::Linux,
container: &Container,
cmanager: Box<dyn CgroupManager>,
) -> Result<Process> {
// create a new pipe
let cpipe = Pipe::new()?;

// create new parent process structure
let (mut parent, sender_for_parent) = parent::ParentProcess::new()?;
let (mut parent, parent_channel) = parent::ParentProcess::new(rootless.clone())?;
// create a new child process structure with sending end of parent process
let child = child::ChildProcess::new(sender_for_parent)?;
let mut child = child::ChildProcess::new(parent_channel)?;

// fork the process
match unsafe { unistd::fork()? } {
Expand All @@ -51,21 +49,28 @@ pub fn fork_first<P: AsRef<Path>>(
// if new user is specified in specification, this will be true
// and new namespace will be created, check https://man7.org/linux/man-pages/man7/user_namespaces.7.html
// for more information
if is_userns {
if rootless.is_some() {
log::debug!("creating new user namespace");
sched::unshare(sched::CloneFlags::CLONE_NEWUSER)?;

// child needs to be dumpable, otherwise the non root parent is not
// allowed to write the uid/gid maps
prctl::set_dumpable(true).unwrap();
child.request_identifier_mapping()?;
child.wait_for_mapping_ack()?;
prctl::set_dumpable(false).unwrap();
}

cpipe.notify()?;
Ok(Process::Child(child))
}
// in the parent process
unistd::ForkResult::Parent { child } => {
cpipe.wait()?;

// wait for child to fork init process and report back its pid
let init_pid = parent.wait_for_child_ready()?;
let init_pid = parent.wait_for_child_ready(child)?;
log::debug!("init pid is {:?}", init_pid);
cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?;
if rootless.is_none() && linux.resources.is_some() {
cmanager.apply(&linux.resources.as_ref().unwrap(), Pid::from_raw(init_pid))?;
}

// update status and pid of the container process
container
Expand Down
4 changes: 4 additions & 0 deletions src/process/message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,17 @@
pub enum Message {
ChildReady = 0x00,
InitReady = 0x01,
WriteMapping = 0x02,
MappingWritten = 0x03,
}

impl From<u8> for Message {
fn from(from: u8) -> Self {
match from {
0x00 => Message::ChildReady,
0x01 => Message::InitReady,
0x02 => Message::WriteMapping,
0x03 => Message::MappingWritten,
_ => panic!("unknown message."),
}
}
Expand Down
2 changes: 2 additions & 0 deletions src/process/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,5 @@ const MAX_EVENTS: usize = 128;
const WAIT_FOR_CHILD: Duration = Duration::from_secs(5);
/// Time to wait when polling for message from init process
const WAIT_FOR_INIT: Duration = Duration::from_millis(1000);
/// Time to wait when polling for mapping ack from parent
const WAIT_FOR_MAPPING: Duration = Duration::from_secs(3);
Loading

0 comments on commit 1afde70

Please sign in to comment.