diff --git a/omnibor/src/artifact_id.rs b/omnibor/src/artifact_id.rs index 73b9028..7a007ba 100644 --- a/omnibor/src/artifact_id.rs +++ b/omnibor/src/artifact_id.rs @@ -1,5 +1,7 @@ use crate::Error; use crate::Result; +#[cfg(doc)] +use crate::Sha256; use crate::SupportedHash; use gitoid::Blob; use gitoid::GitOid; @@ -31,6 +33,11 @@ pub struct ArtifactId { impl ArtifactId { /// Construct an [`ArtifactId`] from an existing [`GitOid`]. /// + /// This produces an identifier using the provided [`GitOid`] directly, + /// without additional validation. The type system ensures the [`GitOid`] + /// hash algorithm is one supported for an [`ArtifactId`], and that the + /// object type is [`gitoid::Blob`]. + /// /// # Example /// /// ```rust @@ -38,44 +45,71 @@ impl ArtifactId { /// # use omnibor::Sha256; /// # use gitoid::GitOid; /// let gitoid = GitOid::from_str("hello, world"); - /// let id: ArtifactId = ArtifactId::from_gitoid(gitoid); + /// let id: ArtifactId = ArtifactId::id_gitoid(gitoid); /// println!("Artifact ID: {}", id); /// ``` - pub fn from_gitoid(gitoid: GitOid) -> ArtifactId { + pub fn id_gitoid(gitoid: GitOid) -> ArtifactId { ArtifactId { gitoid } } /// Construct an [`ArtifactId`] from raw bytes. /// + /// This hashes the bytes to produce an identifier. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_bytes(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); + /// let id: ArtifactId = ArtifactId::id_bytes(&[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]); /// println!("Artifact ID: {}", id); /// ``` - pub fn from_bytes>(content: B) -> ArtifactId { - ArtifactId::from_gitoid(GitOid::from_bytes(content)) + pub fn id_bytes>(content: B) -> ArtifactId { + ArtifactId::id_gitoid(GitOid::from_bytes(content)) } /// Construct an [`ArtifactId`] from a string. /// + /// This hashes the contents of the string to produce an identifier. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID: {}", id); /// ``` - #[allow(clippy::should_implement_trait)] - pub fn from_str>(s: S) -> ArtifactId { - ArtifactId::from_gitoid(GitOid::from_str(s)) + pub fn id_str>(s: S) -> ArtifactId { + ArtifactId::id_gitoid(GitOid::from_str(s)) } /// Construct an [`ArtifactId`] from a synchronous reader. /// + /// This reads the content of the reader and hashes it to produce an identifier. + /// + /// Note that this will figure out the expected size in bytes of the content + /// being read by seeking to the end of the content and then back to wherever the + /// reading initially started. This is to enable a correctness check where the total + /// number of bytes hashed is checked against the expected length. If they do not + /// match, we return an [`Error`] rather than proceeding with a potentially-invalid + /// identifier. + /// + /// If you don't want this seeking to occur, you can use + /// [`ArtifactId::id_reader_with_length`] instead, which takes an explicit expected + /// length and checks against _that_ value, rather than inferring an expected length. + /// + /// Also note that this doesn't reset the reader to the beginning of its region; if + /// you provide a reader which has already read some portion of an underlying file or + /// has seeked to a point that's not the beginning, this function will continue reading + /// from that point, and the resulting hash will _not_ encompass the contents of the + /// entire file. You can use [`ArtifactId::id_reader_with_length`] and provide the + /// expected length of the full file in bytes to defend against this "partial hash" + /// error. + /// + /// Reads are buffered internally to reduce the number of syscalls and context switches + /// between the kernel and user code. + /// /// # Example /// /// ```rust @@ -83,16 +117,33 @@ impl ArtifactId { /// # use omnibor::Sha256; /// # use std::fs::File; /// let file = File::open("test/data/hello_world.txt").unwrap(); - /// let id: ArtifactId = ArtifactId::from_reader(&file).unwrap(); + /// let id: ArtifactId = ArtifactId::id_reader(&file).unwrap(); /// println!("Artifact ID: {}", id); /// ``` - pub fn from_reader(reader: R) -> Result> { + pub fn id_reader(reader: R) -> Result> { let gitoid = GitOid::from_reader(reader)?; - Ok(ArtifactId::from_gitoid(gitoid)) + Ok(ArtifactId::id_gitoid(gitoid)) } /// Construct an [`ArtifactId`] from a synchronous reader with an expected length. /// + /// This reads the content of the reader and hashes it to produce an identifier. + /// + /// This uses the `expected_len` to enable a correctness check where the total + /// number of bytes hashed is checked against the expected length. If they do not + /// match, we return an [`Error`] rather than proceeding with a potentially-invalid + /// identifier. + /// + /// Also note that this doesn't reset the reader to the beginning of its region; if + /// you provide a reader which has already read some portion of an underlying file or + /// has seeked to a point that's not the beginning, this function will continue reading + /// from that point, and the resulting hash will _not_ encompass the contents of the + /// entire file. Make sure to provide the expected number of bytes for the full file + /// to protect against this error. + /// + /// Reads are buffered internally to reduce the number of syscalls and context switches + /// between the kernel and user code. + /// /// # Example /// /// ```rust @@ -100,19 +151,46 @@ impl ArtifactId { /// # use omnibor::Sha256; /// # use std::fs::File; /// let file = File::open("test/data/hello_world.txt").unwrap(); - /// let id: ArtifactId = ArtifactId::from_reader_with_length(&file, 11).unwrap(); + /// let id: ArtifactId = ArtifactId::id_reader_with_length(&file, 11).unwrap(); /// println!("Artifact ID: {}", id); /// ``` - pub fn from_reader_with_length( + pub fn id_reader_with_length( reader: R, expected_length: usize, ) -> Result> { let gitoid = GitOid::from_reader_with_length(reader, expected_length)?; - Ok(ArtifactId::from_gitoid(gitoid)) + Ok(ArtifactId::id_gitoid(gitoid)) } /// Construct an [`ArtifactId`] from an asynchronous reader. /// + /// This reads the content of the reader and hashes it to produce an identifier. + /// + /// Reading is done asynchronously by the Tokio runtime. The specifics of how this + /// is done are based on the configuration of the runtime. + /// + /// Note that this will figure out the expected size in bytes of the content + /// being read by seeking to the end of the content and then back to wherever the + /// reading initially started. This is to enable a correctness check where the total + /// number of bytes hashed is checked against the expected length. If they do not + /// match, we return an [`Error`] rather than proceeding with a potentially-invalid + /// identifier. + /// + /// If you don't want this seeking to occur, you can use + /// [`ArtifactId::id_reader_with_length`] instead, which takes an explicit expected + /// length and checks against _that_ value, rather than inferring an expected length. + /// + /// Also note that this doesn't reset the reader to the beginning of its region; if + /// you provide a reader which has already read some portion of an underlying file or + /// has seeked to a point that's not the beginning, this function will continue reading + /// from that point, and the resulting hash will _not_ encompass the contents of the + /// entire file. You can use [`ArtifactId::id_reader_with_length`] and provide the + /// expected length of the full file in bytes to defend against this "partial hash" + /// error. + /// + /// Reads are buffered internally to reduce the number of syscalls and context switches + /// between the kernel and user code. + /// /// # Example /// /// ```rust @@ -121,19 +199,39 @@ impl ArtifactId { /// # use tokio::fs::File; /// # tokio_test::block_on(async { /// let mut file = File::open("test/data/hello_world.txt").await.unwrap(); - /// let id: ArtifactId = ArtifactId::from_async_reader(&mut file).await.unwrap(); + /// let id: ArtifactId = ArtifactId::id_async_reader(&mut file).await.unwrap(); /// println!("Artifact ID: {}", id); /// # }) /// ``` - pub async fn from_async_reader( + pub async fn id_async_reader( reader: R, ) -> Result> { let gitoid = GitOid::from_async_reader(reader).await?; - Ok(ArtifactId::from_gitoid(gitoid)) + Ok(ArtifactId::id_gitoid(gitoid)) } /// Construct an [`ArtifactId`] from an asynchronous reader with an expected length. /// + /// This reads the content of the reader and hashes it to produce an identifier. + /// + /// Reading is done asynchronously by the Tokio runtime. The specifics of how this + /// is done are based on the configuration of the runtime. + /// + /// This uses the `expected_len` to enable a correctness check where the total + /// number of bytes hashed is checked against the expected length. If they do not + /// match, we return an [`Error`] rather than proceeding with a potentially-invalid + /// identifier. + /// + /// Also note that this doesn't reset the reader to the beginning of its region; if + /// you provide a reader which has already read some portion of an underlying file or + /// has seeked to a point that's not the beginning, this function will continue reading + /// from that point, and the resulting hash will _not_ encompass the contents of the + /// entire file. Make sure to provide the expected number of bytes for the full file + /// to protect against this error. + /// + /// Reads are buffered internally to reduce the number of syscalls and context switches + /// between the kernel and user code. + /// /// # Example /// /// ```rust @@ -142,19 +240,30 @@ impl ArtifactId { /// # use tokio::fs::File; /// # tokio_test::block_on(async { /// let mut file = File::open("test/data/hello_world.txt").await.unwrap(); - /// let id: ArtifactId = ArtifactId::from_async_reader_with_length(&mut file, 11).await.unwrap(); + /// let id: ArtifactId = ArtifactId::id_async_reader_with_length(&mut file, 11).await.unwrap(); /// println!("Artifact ID: {}", id); /// # }) /// ``` - pub async fn from_async_reader_with_length( + pub async fn id_async_reader_with_length( reader: R, expected_length: usize, ) -> Result> { let gitoid = GitOid::from_async_reader_with_length(reader, expected_length).await?; - Ok(ArtifactId::from_gitoid(gitoid)) + Ok(ArtifactId::id_gitoid(gitoid)) } - /// Construct an [`ArtifactId`] from a [`Url`]. + /// Construct an [`ArtifactId`] from a `gitoid`-scheme [`Url`]. + /// + /// This validates that the provided URL has a hashing scheme which matches the one + /// selected for your [`ArtifactId`] (today, only `sha256` is supported), and has the + /// `blob` object type. It also validates that the provided hash is a valid hash for + /// the specified hashing scheme. If any of these checks fail, the function returns + /// an [`Error`]. + /// + /// Note that this expects a `gitoid`-scheme URL, as defined by IANA. This method + /// _does not_ expect an HTTP or HTTPS URL to access, retrieve contents, and hash + /// those contents to produce an identifier. You _can_ implement that yourself with + /// a Rust HTTP(S) crate and [`ArtifactId::id_bytes`]. /// /// # Example /// @@ -163,21 +272,23 @@ impl ArtifactId { /// # use omnibor::Sha256; /// # use url::Url; /// let url = Url::parse("gitoid:blob:sha256:fee53a18d32820613c0527aa79be5cb30173c823a9b448fa4817767cc84c6f03").unwrap(); - /// let id: ArtifactId = ArtifactId::from_url(url).unwrap(); + /// let id: ArtifactId = ArtifactId::id_url(url).unwrap(); /// println!("Artifact ID: {}", id); /// ``` - pub fn from_url(url: Url) -> Result> { + pub fn id_url(url: Url) -> Result> { ArtifactId::try_from(url) } /// Get the [`Url`] representation of the [`ArtifactId`]. /// + /// This returns a `gitoid`-scheme URL for the [`ArtifactId`]. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID URL: {}", id.url()); /// ``` pub fn url(&self) -> Url { @@ -186,12 +297,15 @@ impl ArtifactId { /// Get the underlying bytes of the [`ArtifactId`] hash. /// + /// This slice is the raw underlying buffer of the [`ArtifactId`], exactly + /// as produced by the hasher. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID bytes: {:?}", id.as_bytes()); /// ``` pub fn as_bytes(&self) -> &[u8] { @@ -200,12 +314,16 @@ impl ArtifactId { /// Get the bytes of the [`ArtifactId`] hash as a hexadecimal string. /// + /// This returns a [`String`] rather than [`str`] because the string must be + /// constructed on the fly, as we do not store a hexadecimal representation + /// of the hash data. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID bytes as hex: {}", id.as_hex()); /// ``` pub fn as_hex(&self) -> String { @@ -214,12 +332,14 @@ impl ArtifactId { /// Get the name of the hash algorithm used in the [`ArtifactId`] as a string. /// + /// For [`Sha256`], this is the string `"sha256"`. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID hash algorithm: {}", id.hash_algorithm()); /// ``` pub const fn hash_algorithm(&self) -> &'static str { @@ -228,12 +348,15 @@ impl ArtifactId { /// Get the object type used in the [`ArtifactId`] as a string. /// + /// For all [`ArtifactId`]s this is `"blob"`, but the method is provided + /// for completeness nonetheless. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID object type: {}", id.object_type()); /// ``` pub const fn object_type(&self) -> &'static str { @@ -242,12 +365,16 @@ impl ArtifactId { /// Get the length in bytes of the hash used in the [`ArtifactId`]. /// + /// In the future this method will be `const`, but is not able to be + /// today due to limitations in the Rust cryptography crates we use + /// internally. + /// /// # Example /// /// ```rust /// # use omnibor::ArtifactId; /// # use omnibor::Sha256; - /// let id: ArtifactId = ArtifactId::from_str("hello, world"); + /// let id: ArtifactId = ArtifactId::id_str("hello, world"); /// println!("Artifact ID hash length in bytes: {}", id.hash_len()); /// ``` pub fn hash_len(&self) -> usize { @@ -259,7 +386,7 @@ impl FromStr for ArtifactId { type Err = Error; fn from_str(s: &str) -> Result> { - Ok(ArtifactId::from_str(s)) + Ok(ArtifactId::id_str(s)) } } @@ -319,6 +446,6 @@ impl TryFrom for ArtifactId { fn try_from(url: Url) -> Result> { let gitoid = GitOid::from_url(url)?; - Ok(ArtifactId::from_gitoid(gitoid)) + Ok(ArtifactId::id_gitoid(gitoid)) } } diff --git a/omnibor/src/lib.rs b/omnibor/src/lib.rs index bf9f30a..56b132c 100644 --- a/omnibor/src/lib.rs +++ b/omnibor/src/lib.rs @@ -1,4 +1,45 @@ -//! OmniBOR in Rust. +//! OmniBOR Artifact Identifiers and Artifact Input Manifests in Rust. +//! +//! ## What is OmniBOR? +//! +//! [OmniBOR][omnibor] is a draft specification which defines two key concepts: +//! +//! - __Artifact Identifiers__: independently-reproducible identifiers for +//! software artifacts. +//! - __Artifact Input Manifests__: record the IDs of every input used in the +//! build process for an artifact. +//! +//! Artifact IDs enable _anyone_ to identify and cross-reference information for +//! software artifacts without a central authority. Unlike [pURL][purl] or [CPE][cpe], +//! OmniBOR Artifact IDs don't rely on a third-party, they are _inherent +//! identifiers_ determined only by an artifact itself. They're based on +//! [Git's Object IDs (GitOIDs)][gitoid] in both construction and choice of +//! cryptographic hash functions. +//! +//! Artifact Input Manifests allow consumers to reconstruct Artifact Dependency +//! Graphs that give _fine-grained_ visibility into how artifacts in your +//! software supply chain were made. With these graphs, consumers could +//! in the future identify the presence of exact files associated with known +//! vulnerabilities, side-stepping the complexities of matching version numbers +//! across platforms and patching practicies. +//! +//! [__You can view the OmniBOR specification here.__][omnibor_spec] +//! +//! The United States Cybersecurity & Infrastructure Security Agency (CISA), +//! identified OmniBOR as a major candidate for software identities +//! in its 2023 report ["Software Identification Ecosystem Option +//! Analysis."][cisa_report] +//! +//! [contributing]: CONTRIBUTING.md +//! [cbindgen]: https://github.com/eqrion/cbindgen +//! [cisa_report]: https://www.cisa.gov/sites/default/files/2023-10/Software-Identification-Ecosystem-Option-Analysis-508c.pdf +//! [cpe]: https://nvd.nist.gov/products/cpe +//! [gitoid]: https://git-scm.com/book/en/v2/Git-Internals-Git-Objects +//! [gitoid_crate]: https://crates.io/crates/gitoid +//! [omnibor]: https://omnibor.io +//! [omnibor_crate]: https://crates.io/crates/omnibor +//! [omnibor_spec]: https://github.com/omnibor/spec +//! [purl]: https://github.com/package-url/purl-spec pub(crate) mod sealed;