-
Notifications
You must be signed in to change notification settings - Fork 40
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
The high level goal of this crate is to be an opinionated generic storage layer using composefs, with direct support for OCI. Note not just OCI *containers* but also including OCI artifacts too. This crate is intended to be the successor to the "storage core" of both ostree and containers/storage. Signed-off-by: Colin Walters <walters@verbum.org>
- Loading branch information
Showing
8 changed files
with
1,015 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
[package] | ||
name = "composefs-oci" | ||
version = "0.1.0" | ||
edition = "2021" | ||
|
||
[dependencies] | ||
anyhow = "1.0" | ||
bincode = { version = "1.3.3" } | ||
containers-image-proxy = "0.6" | ||
composefs = { path = "../composefs", features = ["v1_0_4"] } | ||
cap-std-ext = "4.0" | ||
camino = "1" | ||
clap = { version= "4.2", features = ["derive"] } | ||
fn-error-context = "0.2.0" | ||
ocidir = "0.2" | ||
rustix = { version = "0.38.34", features = ["fs"] } | ||
libc = "0.2" | ||
serde = "1" | ||
tar = "0.4.38" | ||
tokio = { features = ["io-std", "time", "process", "rt", "net"], version = ">= 1.13.0" } | ||
tokio-util = { features = ["io-util"], version = "0.7" } | ||
tokio-stream = { features = ["sync"], version = "0.1.8" } | ||
hex = "0.4.3" | ||
serde_json = "1.0.117" | ||
|
||
[lints] | ||
workspace = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
# composefs-oci | ||
|
||
The high level goal of this crate is to be an opinionated | ||
generic storage layer using composefs, with direct support | ||
for OCI. Note not just OCI *containers* but also including | ||
OCI artifacts too. | ||
|
||
This crate is intended to be the successor to | ||
the "storage core" of both ostree and containers/storage. | ||
|
||
## Design | ||
|
||
The composefs core just offers the primitive of creating | ||
"superblocks" which can have regular file data point | ||
to underlying "loose" objects stored in an arbitrary place. | ||
|
||
cfs-oci (for short) roughly matches the goal of both | ||
ostree and containers/storage in supporting multiple | ||
versioned filesystem trees with associated metadata, | ||
including support for e.g. garbage collection. | ||
|
||
### Layout | ||
|
||
By default, a cfs-ocidir augments an [OCI image layout](https://github.com/opencontainers/image-spec/blob/main/image-layout.md). | ||
|
||
However, media types of `application/vnd.oci.image.layer.v1.tar` may optionally be stored | ||
in a way that they can be natively mounted via composefs. This storage can be | ||
*additional* (which means storage cost is effectively the compressed size, plus uncompressed size) | ||
or an image can be "consumed" which means the compressed version is discarded. | ||
The tradeoff with this is that it is in general *not* possible to bit-for-bit | ||
reproduce the compressed blob again. | ||
|
||
#### Composefs ready layout | ||
|
||
cfs-ocidir augments the OCI image layout with a new `cfs/` directory. | ||
|
||
##### "split-checksum" format | ||
|
||
Side note: This follows a longstanding tradition of splitting up a checksum into (first two bytes, remaining bytes) | ||
creating subdirectories for the first two bytes. It is used by composefs by default. | ||
|
||
A cfs-ocidir has the following subdirectories: | ||
|
||
##### layers/ | ||
|
||
This has "split-checksum" entries of the form `<diffid>.cfs` which are a composefs corresponding to the given diffid (tar layer). | ||
Each file MAY have xattrs of the form `user.cfs.compressed` which include the original compressed digest. | ||
|
||
##### objects/ | ||
|
||
A composefs objects directory containing regular files, all of mode 0 (when run as root) or 0400 (when run as an unprivileged user) | ||
|
||
##### manifests | ||
|
||
This plays a role similar to the `manifests` array in https://github.com/opencontainers/image-spec/blob/main/image-index.md | ||
|
||
This is also an object directory using the `sha256:` of the manifest digest. | ||
|
||
Each entry is a manifest (JSON). It is also recommended to make this a hardlink into the objects/ directory to enable sharing across cfs-oci directories. | ||
|
||
It is possible that the manifest has an native annotation `composefs.rootfs.digest` which is the composefs digest of the flattened/merged root. This is called a "composefs-enabled" manifest, which allows a signature that covers the manifest | ||
to also cover the composefs digest and allow efficient verification of the root filesystem for the image. | ||
|
||
If the manifest does not have that annotation, then the composefs digest is stored as an extended attribute `user.composefs.rootfs.digest`. | ||
|
||
That composefs digest can be used to look up the actual composefs superblock for the rootfs in the objects/ directory. | ||
|
||
## CLI sketch: OCI container images | ||
|
||
`cfs-oci --repo=/path/to/repo image list|pull|rm|mount` | ||
|
||
## CLI sketch: OCI artifacts | ||
|
||
`cfs-oci --repo=/path/to/repo artifact list|pull|rm` | ||
|
||
## CLI sketch: Other | ||
|
||
### Efficiently clone a repo | ||
|
||
`cfs-oci clone /path/to/repo /path/to/clone` | ||
This would use reflinks (if available) or hardlinks if not | ||
for all the loose objects, but allow fully distinct namespacing/ownership | ||
of images. | ||
|
||
For example, it would probably make sense to have | ||
bootc and podman use separate physical stores in | ||
`/ostree` and `/var/lib/containers` - but if they're | ||
on the same filesystem, we can efficiently and safely share | ||
backing objects! | ||
|
||
### Injecting "flattened" composefs digests | ||
|
||
Another verb that should be supported here is: | ||
`cfs-oci --repo=/path/to/repo image finalize <imagename>` | ||
|
||
This would compute the *flattened* final filesystem tree | ||
for the container image, and inject its metadata into | ||
the manifest as an annotation e.g. `containers.composefs.digest`. | ||
|
||
Then, a signature which covers the manifest such as Sigstore | ||
can also cover verification of the filesystem tree. Of course, | ||
one could use any signature scheme desired to sign images. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,168 @@ | ||
use std::io; | ||
use std::path::Path; | ||
|
||
use anyhow::Result; | ||
use cap_std_ext::{ | ||
cap_std::fs::{ | ||
DirBuilder, DirBuilderExt as _, OpenOptions, OpenOptionsExt as _, Permissions, | ||
PermissionsExt as _, | ||
}, | ||
cap_tempfile::TempFile, | ||
}; | ||
use rustix::{ | ||
fd::{AsFd, AsRawFd, BorrowedFd, OwnedFd}, | ||
fs::openat, | ||
}; | ||
|
||
/// The default permissions set for directories; we assume | ||
/// nothing else should be accessing this content. If you want | ||
/// that, you can chmod() after, or use ACLs. | ||
pub(crate) fn rwx_perms() -> Permissions { | ||
Permissions::from_mode(0o700) | ||
} | ||
/// The default permissions for regular files. Ditto per above. | ||
pub(crate) fn r_perms() -> Permissions { | ||
Permissions::from_mode(0o400) | ||
} | ||
|
||
pub(crate) fn default_dirbuilder() -> DirBuilder { | ||
let mut builder = DirBuilder::new(); | ||
builder.mode(rwx_perms().mode()); | ||
builder | ||
} | ||
|
||
/// For creating a file with the default permissions | ||
pub(crate) fn default_file_create_options() -> OpenOptions { | ||
let mut r = OpenOptions::new(); | ||
r.create(true); | ||
r.mode(r_perms().mode()); | ||
r | ||
} | ||
|
||
/// Given a string, verify it is a single component of a path; it must | ||
/// not contain `/`. | ||
pub(crate) fn validate_single_path_component(s: &str) -> Result<()> { | ||
anyhow::ensure!(!s.contains('/')); | ||
Ok(()) | ||
} | ||
|
||
pub(crate) fn parent_nonempty(p: &Path) -> Option<&Path> { | ||
p.parent().filter(|v| !v.as_os_str().is_empty()) | ||
} | ||
|
||
// Just ensures that path is not absolute, so that it can be passed | ||
// to cap-std APIs. This makes no attempt | ||
// to avoid directory escapes like `../` under the assumption | ||
// that will be handled by a higher level function. | ||
pub(crate) fn ensure_relative_path(path: &Path) -> &Path { | ||
path.strip_prefix("/").unwrap_or(path) | ||
} | ||
|
||
/// Operates on a generic openat fd | ||
pub(crate) fn ensure_dir(fd: BorrowedFd, p: &Path) -> io::Result<bool> { | ||
use rustix::fs::AtFlags; | ||
let mode = rwx_perms().mode(); | ||
match rustix::fs::mkdirat(fd, p, rustix::fs::Mode::from_raw_mode(mode)) { | ||
Ok(()) => Ok(true), | ||
Err(e) if e.kind() == std::io::ErrorKind::AlreadyExists => { | ||
let st = rustix::fs::statat(fd, p, AtFlags::SYMLINK_NOFOLLOW)?; | ||
if !(st.st_mode & libc::S_IFDIR > 0) { | ||
// TODO use https://doc.rust-lang.org/std/io/enum.ErrorKind.html#variant.NotADirectory | ||
// once it's stable. | ||
return Err(io::Error::new(io::ErrorKind::Other, "Found non-directory")); | ||
} | ||
Ok(false) | ||
} | ||
// If we got ENOENT, then loop again, but create the parents | ||
Err(e) => Err(e.into()), | ||
} | ||
} | ||
|
||
/// The cap-std default does not use RESOLVE_IN_ROOT; this does. | ||
/// Additionally for good measure we use NO_MAGICLINKS and NO_XDEV. | ||
/// We never expect to encounter a mounted /proc in our use cases nor | ||
/// any other mountpoints at all really, but still. | ||
pub(crate) fn openat_rooted( | ||
dirfd: BorrowedFd, | ||
path: impl AsRef<Path>, | ||
) -> rustix::io::Result<OwnedFd> { | ||
use rustix::fs::{OFlags, ResolveFlags}; | ||
rustix::fs::openat2( | ||
dirfd, | ||
path.as_ref(), | ||
OFlags::NOFOLLOW | OFlags::CLOEXEC | OFlags::PATH, | ||
rustix::fs::Mode::empty(), | ||
ResolveFlags::IN_ROOT | ResolveFlags::NO_MAGICLINKS | ResolveFlags::NO_XDEV, | ||
) | ||
} | ||
|
||
/// Not all operations can be performed on an O_PATH directory; e.g. | ||
/// fsetxattr() can't. | ||
pub fn fsetxattr<Fd: AsFd>( | ||
fd: Fd, | ||
name: &str, | ||
value: &[u8], | ||
flags: rustix::fs::XattrFlags, | ||
) -> rustix::io::Result<()> { | ||
let path = format!("/proc/self/fd/{}", fd.as_fd().as_raw_fd()); | ||
rustix::fs::setxattr(&path, name, value, flags) | ||
} | ||
|
||
/// Manual implementation of recursive dir walking using openat2 | ||
pub(crate) fn ensure_dir_recursive(fd: BorrowedFd, p: &Path, init: bool) -> io::Result<bool> { | ||
// Optimize the initial case by skipping the recursive calls; | ||
// we just call mkdirat() and no-op if we get EEXIST | ||
if !init { | ||
if let Some(parent) = parent_nonempty(p) { | ||
ensure_dir_recursive(fd, parent, false)?; | ||
} | ||
} | ||
match ensure_dir(fd, p) { | ||
Ok(b) => Ok(b), | ||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => ensure_dir_recursive(fd, p, false), | ||
Err(e) => Err(e), | ||
} | ||
} | ||
|
||
/// Given a cap-std tmpfile, reopen its file in read-only mode. This is | ||
/// needed for fsverity support. | ||
pub(crate) fn reopen_tmpfile_ro(tf: &mut TempFile) -> std::io::Result<()> { | ||
let procpath = format!("/proc/self/fd/{}", tf.as_file().as_fd().as_raw_fd()); | ||
let tf_ro = cap_std_ext::cap_std::fs::File::open_ambient( | ||
procpath, | ||
cap_std_ext::cap_std::ambient_authority(), | ||
)?; | ||
let tf = tf.as_file_mut(); | ||
*tf = tf_ro; | ||
Ok(()) | ||
} | ||
|
||
// pub(crate) fn normalize_path(path: &Utf8Path) -> Result<Utf8PathBuf> { | ||
// let mut components = path.components().peekable(); | ||
// let r = if !matches!(components.peek(), Some(camino::Utf8Component::RootDir)) { | ||
// [camino::Utf8Component::RootDir] | ||
// .into_iter() | ||
// .chain(components) | ||
// .collect() | ||
// } else { | ||
// components.collect() | ||
// }; | ||
// Ok(r) | ||
// } | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
#[test] | ||
fn test_relpath() { | ||
let expected_foobar = "foo/bar"; | ||
let cases = [("foo/bar", expected_foobar), ("/foo/bar", expected_foobar)]; | ||
for (a, b) in cases { | ||
assert_eq!(ensure_relative_path(Path::new(a)), Path::new(b)); | ||
} | ||
let idem = ["./foo/bar", "./foo", "./"]; | ||
for case in idem { | ||
assert_eq!(ensure_relative_path(Path::new(case)), Path::new(case)); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
use std::ffi::OsString; | ||
|
||
use anyhow::{Context, Result}; | ||
use camino::Utf8PathBuf; | ||
use clap::Parser; | ||
use ocidir::cap_std::{self, fs::Dir}; | ||
use pull::cli_pull; | ||
|
||
mod fileutils; | ||
pub mod pull; | ||
pub mod repo; | ||
|
||
/// Options for specifying the repository | ||
#[derive(Debug, Parser)] | ||
pub(crate) struct RepoOpts { | ||
/// Path to the repository | ||
#[clap(long, value_parser)] | ||
repo: Utf8PathBuf, | ||
} | ||
|
||
impl RepoOpts { | ||
pub(crate) fn open(&self) -> Result<crate::repo::Repo> { | ||
let repo = self.repo.as_path(); | ||
let d = Dir::open_ambient_dir(repo, cap_std::ambient_authority()) | ||
.with_context(|| format!("Opening {repo}"))?; | ||
crate::repo::Repo::open(d) | ||
} | ||
} | ||
|
||
/// Options for importing a tar archive. | ||
#[derive(Debug, Parser)] | ||
pub(crate) struct PullOpts { | ||
#[clap(flatten)] | ||
repo_opts: RepoOpts, | ||
|
||
/// Image reference | ||
image: String, | ||
} | ||
|
||
/// Toplevel options | ||
#[derive(Debug, Parser)] | ||
#[clap(name = "composefs")] | ||
#[clap(rename_all = "kebab-case")] | ||
#[allow(clippy::large_enum_variant)] | ||
pub(crate) enum Opt { | ||
/// Pull an image | ||
Pull(PullOpts), | ||
} | ||
|
||
/// Parse the provided arguments and execute. | ||
/// Calls [`clap::Error::exit`] on failure, printing the error message and aborting the program. | ||
pub async fn run_from_iter<I>(args: I) -> Result<()> | ||
where | ||
I: IntoIterator, | ||
I::Item: Into<OsString> + Clone, | ||
{ | ||
run_from_opt(Opt::parse_from(args)).await | ||
} | ||
|
||
async fn run_from_opt(opt: Opt) -> Result<()> { | ||
match opt { | ||
Opt::Pull(opts) => cli_pull(opts).await, | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
use anyhow::Result; | ||
use ocidir::cap_std::fs::Dir; | ||
|
||
use crate::PullOpts; | ||
|
||
pub async fn pull( | ||
proxy: &containers_image_proxy::ImageProxy, | ||
img: &containers_image_proxy::OpenedImage, | ||
) -> Result<()> { | ||
todo!() | ||
} | ||
|
||
pub(crate) async fn cli_pull(opts: PullOpts) -> Result<()> { | ||
let repo = opts.repo_opts.open()?; | ||
let proxy = containers_image_proxy::ImageProxy::new().await?; | ||
let img = proxy.open_image(&opts.image).await?; | ||
let (manifest_digest, manifest) = proxy.fetch_manifest(&img).await?; | ||
|
||
todo!() | ||
} |
Oops, something went wrong.