From 16a1e506475f24bf2988ba1dd7a321db7fd226f7 Mon Sep 17 00:00:00 2001 From: Philipp Schuster Date: Thu, 27 Apr 2023 17:51:52 +0200 Subject: [PATCH] multiboot2: use dst for tags where applicable This commit transforms a few tags where applicable to DSTs. This better fits into the Rust type system and makes a few things easier, such as parsing the cmdline string from the command line tag. Depending on how users used the tags, this change is not even breaking. Additionally, there is now a public trait which allows custom tag users to also benefit from DSTs. --- Cargo.lock | 21 ++++ multiboot2/Cargo.toml | 1 + multiboot2/Changelog.md | 7 ++ multiboot2/src/boot_loader_name.rs | 55 ++++++----- multiboot2/src/command_line.rs | 52 ++++++---- multiboot2/src/lib.rs | 148 +++++++++++++++++++---------- multiboot2/src/module.rs | 38 ++++---- multiboot2/src/tag_type.rs | 53 ++++++++++- 8 files changed, 267 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fb9c7456..262adbe6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -50,6 +50,7 @@ dependencies = [ "bitflags", "derive_more", "log", + "ptr_meta", ] [[package]] @@ -68,6 +69,26 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "ptr_meta" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcada80daa06c42ed5f48c9a043865edea5dc44cbf9ac009fda3b89526e28607" +dependencies = [ + "ptr_meta_derive", +] + +[[package]] +name = "ptr_meta_derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bca9224df2e20e7c5548aeb5f110a0f3b77ef05f8585139b7148b59056168ed2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quote" version = "1.0.26" diff --git a/multiboot2/Cargo.toml b/multiboot2/Cargo.toml index 748a9591..c323456c 100644 --- a/multiboot2/Cargo.toml +++ b/multiboot2/Cargo.toml @@ -40,3 +40,4 @@ unstable = [] bitflags = "1" derive_more = { version = "0.99", default-features = false, features = ["display"] } log = { version = "0.4", default-features = false } +ptr_meta = { version = "0.2.0", default-features = false } diff --git a/multiboot2/Changelog.md b/multiboot2/Changelog.md index 775483ef..f2e1be25 100644 --- a/multiboot2/Changelog.md +++ b/multiboot2/Changelog.md @@ -1,5 +1,12 @@ # CHANGELOG for crate `multiboot2` +## unreleased +- Add `TagTrait` trait which enables to use DSTs as multiboot2 tags. This is + mostly relevant for the command line tag, the modules tag, and the bootloader + name tag. However, this might also be relevant for users of custom multiboot2 + tags that use DSTs as types. See the example provided in the doc of the + `get_tag` method. + ## 0.15.1 (2023-03-18) - **BREAKING** `MemoryMapTag::all_memory_areas()` was renamed to `memory_areas` and now returns `MemoryAreaIter` instead of diff --git a/multiboot2/src/boot_loader_name.rs b/multiboot2/src/boot_loader_name.rs index ea2cb07c..cdf09e39 100644 --- a/multiboot2/src/boot_loader_name.rs +++ b/multiboot2/src/boot_loader_name.rs @@ -1,23 +1,24 @@ -use crate::TagTypeId; +use crate::{Tag, TagTypeId}; +use core::fmt::{Debug, Formatter}; use core::str::Utf8Error; -/// This tag contains the name of the bootloader that is booting the kernel. -/// -/// The name is a normal C-style UTF-8 zero-terminated string that can be -/// obtained via the `name` method. -#[derive(Clone, Copy, Debug)] +/// The bootloader name tag. +#[derive(ptr_meta::Pointee)] #[repr(C, packed)] // only repr(C) would add unwanted padding before first_section pub struct BootLoaderNameTag { typ: TagTypeId, size: u32, /// Null-terminated UTF-8 string - string: u8, + name: [u8], } impl BootLoaderNameTag { - /// Read the name of the bootloader that is booting the kernel. - /// This is an null-terminated UTF-8 string. If this returns `Err` then perhaps the memory - /// is invalid or the bootloader doesn't follow the spec. + /// Reads the name of the bootloader that is booting the kernel as Rust + /// string slice without the null-byte. + /// + /// For example, this returns `"GRUB 2.02~beta3-5"`. + /// + /// If the function returns `Err` then perhaps the memory is invalid. /// /// # Examples /// @@ -28,17 +29,31 @@ impl BootLoaderNameTag { /// } /// ``` pub fn name(&self) -> Result<&str, Utf8Error> { - use core::{mem, slice, str}; - // strlen without null byte - let strlen = self.size as usize - mem::size_of::(); - let bytes = unsafe { slice::from_raw_parts((&self.string) as *const u8, strlen) }; - str::from_utf8(bytes) + Tag::get_dst_str_slice(&self.name) + } +} + +impl Debug for BootLoaderNameTag { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("BootLoaderNameTag") + .field("typ", &{ self.typ }) + .field("size", &{ self.size }) + .field("name", &self.name()) + .finish() + } +} + +impl crate::TagTrait for BootLoaderNameTag { + fn dst_size(base_tag: &Tag) -> usize { + // The size of the sized portion of the bootloader name tag. + let tag_base_size = 8; + base_tag.size as usize - tag_base_size } } #[cfg(test)] mod tests { - use crate::TagType; + use crate::{BootLoaderNameTag, Tag, TagType}; const MSG: &str = "hello"; @@ -63,12 +78,8 @@ mod tests { #[test] fn test_parse_str() { let tag = get_bytes(); - let tag = unsafe { - tag.as_ptr() - .cast::() - .as_ref() - .unwrap() - }; + let tag = unsafe { &*tag.as_ptr().cast::() }; + let tag = tag.cast_tag::(); assert_eq!({ tag.typ }, TagType::BootLoaderName); assert_eq!(tag.name().expect("must be valid UTF-8"), MSG); } diff --git a/multiboot2/src/command_line.rs b/multiboot2/src/command_line.rs index 978cbe0a..6bb0d78d 100644 --- a/multiboot2/src/command_line.rs +++ b/multiboot2/src/command_line.rs @@ -1,27 +1,30 @@ //! Module for [CommandLineTag]. -use crate::TagTypeId; -use core::mem; -use core::slice; +use crate::{Tag, TagTrait, TagTypeId}; +use core::fmt::{Debug, Formatter}; use core::str; /// This tag contains the command line string. /// /// The string is a normal C-style UTF-8 zero-terminated string that can be /// obtained via the `command_line` method. -#[derive(Clone, Copy, Debug)] #[repr(C, packed)] // only repr(C) would add unwanted padding before first_section +#[derive(ptr_meta::Pointee)] pub struct CommandLineTag { typ: TagTypeId, size: u32, /// Null-terminated UTF-8 string - string: u8, + cmdline: [u8], } impl CommandLineTag { - /// Read the command line string that is being passed to the booting kernel. - /// This is an null-terminated UTF-8 string. If this returns `Err` then perhaps the memory - /// is invalid or the bootloader doesn't follow the spec. + /// Reads the command line of the kernel as Rust string slice without + /// the null-byte. + /// + /// For example, this returns `"console=ttyS0"`.if the GRUB config + /// contains `"multiboot2 /mykernel console=ttyS0"`. + /// + /// If the function returns `Err` then perhaps the memory is invalid. /// /// # Examples /// @@ -33,16 +36,31 @@ impl CommandLineTag { /// } /// ``` pub fn command_line(&self) -> Result<&str, str::Utf8Error> { - // strlen without null byte - let strlen = self.size as usize - mem::size_of::(); - let bytes = unsafe { slice::from_raw_parts((&self.string) as *const u8, strlen) }; - str::from_utf8(bytes) + Tag::get_dst_str_slice(&self.cmdline) + } +} + +impl Debug for CommandLineTag { + fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { + f.debug_struct("CommandLineTag") + .field("typ", &{ self.typ }) + .field("size", &{ self.size }) + .field("cmdline", &self.command_line()) + .finish() + } +} + +impl TagTrait for CommandLineTag { + fn dst_size(base_tag: &Tag) -> usize { + // The size of the sized portion of the command line tag. + let tag_base_size = 8; + base_tag.size as usize - tag_base_size } } #[cfg(test)] mod tests { - use crate::TagType; + use crate::{CommandLineTag, Tag, TagType}; const MSG: &str = "hello"; @@ -67,12 +85,8 @@ mod tests { #[test] fn test_parse_str() { let tag = get_bytes(); - let tag = unsafe { - tag.as_ptr() - .cast::() - .as_ref() - .unwrap() - }; + let tag = unsafe { &*tag.as_ptr().cast::() }; + let tag = tag.cast_tag::(); assert_eq!({ tag.typ }, TagType::Cmdline); assert_eq!(tag.command_line().expect("must be valid UTF-8"), MSG); } diff --git a/multiboot2/src/lib.rs b/multiboot2/src/lib.rs index 5f97c98b..06db53cc 100644 --- a/multiboot2/src/lib.rs +++ b/multiboot2/src/lib.rs @@ -39,6 +39,8 @@ extern crate std; use core::fmt; use derive_more::Display; +// Must be public so that custom tags can be DSTs. +pub use ptr_meta::Pointee; use crate::framebuffer::UnknownFramebufferType; pub use boot_loader_name::BootLoaderNameTag; @@ -319,39 +321,54 @@ impl BootInformation { /// [`Self::efi_64_ih`]. /// /// ## Use Custom Tags - /// The following example shows how you may use this interface to parse custom tags from - /// the MBI. Custom tags must be `Sized`. Hence, they are not allowed to contain a field such - /// as `name: [u8]`. + /// The following example shows how you may use this interface to parse + /// custom tags from the MBI. If they are dynamically sized (DST), a few more + /// special handling is required. This is reflected by code-comments. /// - /// **Belows example needs Rust 1.64 or newer because of std::ffi imports!** - /// ```ignore - /// use std::ffi::{c_char, CStr}; - /// use multiboot2::TagTypeId; + /// ```no_run + /// use std::str::Utf8Error; + /// use multiboot2::{Tag, TagTrait, TagTypeId}; /// /// #[repr(C, align(8))] - /// struct CustomTag { + /// #[derive(multiboot2::Pointee)] // Only needed for DSTs. + /// struct CustomTag { /// // new type from the lib: has repr(u32) /// tag: TagTypeId, /// size: u32, /// // begin of inline string - /// name: u8, + /// name: [u8], + /// } + /// + /// // This implementation is only necessary for tags that are DSTs. + /// impl TagTrait for CustomTag { + /// fn dst_size(base_tag: &Tag) -> usize { + /// // The size of the sized portion of the custom tag. + /// let tag_base_size = 8; // id + size is 8 byte in size + /// base_tag.size as usize - tag_base_size + /// } + /// } + /// + /// impl CustomTag { + /// fn name(&self) -> Result<&str, Utf8Error> { + /// Tag::get_dst_str_slice(&self.name) + /// } /// } /// /// let mbi = unsafe { multiboot2::load(0xdeadbeef).unwrap() }; /// /// let tag = mbi - /// // type definition from end user; must be `Sized`! /// .get_tag::(0x1337) /// .unwrap(); - /// let name = &tag.name as *const u8 as *const c_char; - /// let str = unsafe { CStr::from_ptr(name).to_str().unwrap() }; - /// assert_eq!(str, "name"); + /// assert_eq!(tag.name(), Ok("name")); /// ``` - pub fn get_tag>(&self, typ: TagType) -> Option<&Tag> { + pub fn get_tag>( + &self, + typ: TagType, + ) -> Option<&TagT> { let typ = typ.into(); self.tags() .find(|tag| tag.typ == typ) - .map(|tag| tag.cast_tag::()) + .map(|tag| tag.cast_tag::()) } fn tags(&self) -> TagIter { @@ -476,10 +493,49 @@ impl Reader { } } +/// A trait to abstract over all sized and unsized tags (DSTs). For sized tags, +/// this trait does not much. For DSTs, a `TagTrait::dst_size` implementation +/// must me provided, which returns the right size hint for the dynamically +/// sized portion of the struct. +/// +/// The [`TagTrait::from_base_tag`] method has a default implementation for all +/// tags that are `Sized`. +/// +/// # Trivia +/// This crate uses the [`Pointee`]-abstraction of the [`ptr_meta`] crate to +/// create fat pointers. +pub trait TagTrait: Pointee { + /// Returns + fn dst_size(base_tag: &Tag) -> Self::Metadata; + + /// Creates a reference to a (dynamically sized) tag type in a safe way. + /// DST tags need to implement a proper [`Self::dst_size`] implementation. + /// + /// # Safety + /// Callers must be sure that the "size" field of the provided [`Tag`] is + /// sane and the underlying memory valid. The implementation of this trait + /// **must have** a correct [`Self::dst_size`] implementation. + unsafe fn from_base_tag<'a>(tag: &Tag) -> &'a Self { + let ptr = tag as *const _ as *const (); + let ptr = ptr_meta::from_raw_parts(ptr, Self::dst_size(tag)); + &*ptr + } +} + +// All sizes tags automatically have a Pointee implementation where +// Pointee::Metadata is (). Hence, the TagTrait is implemented automatically for +// all tags that are sized. +impl> TagTrait for T { + #[allow(clippy::unused_unit)] + fn dst_size(_: &Tag) -> Self::Metadata { + () + } +} + #[cfg(test)] mod tests { use super::*; - use std::{mem, slice}; + use std::slice; #[test] fn no_tags() { @@ -585,14 +641,6 @@ mod tests { assert!(bi.command_line_tag().is_none()); } - #[test] - /// Compile time test for [`BootLoaderNameTag`]. - fn name_tag_size() { - unsafe { - core::mem::transmute::<[u8; 9], BootLoaderNameTag>([0u8; 9]); - } - } - #[test] fn framebuffer_tag_rgb() { // direct RGB mode test: @@ -1490,46 +1538,54 @@ mod tests { consumer(MbiLoadError::IllegalAddress) } + /// Example for a custom tag without using a DST. #[test] - fn custom_tag() { + fn get_custom_tag_no_dst_from_mbi() { const CUSTOM_TAG_ID: u32 = 0x1337; #[repr(C, align(8))] - struct Bytes([u8; 32]); - let bytes: Bytes = Bytes([ + struct CustomTag { + tag: TagTypeId, + size: u32, + name: u8, + } + + #[repr(C, align(8))] + struct AlignedBytes([u8; 32]); + // Raw bytes of a MBI that only contains the custom tag. + let bytes: AlignedBytes = AlignedBytes([ 32, 0, 0, - 0, // total_size + 0, // end: total size 0, 0, 0, - 0, // reserved - // my custom tag + 0, // end: padding; end of multiboot2 boot information begin CUSTOM_TAG_ID.to_ne_bytes()[0], CUSTOM_TAG_ID.to_ne_bytes()[1], CUSTOM_TAG_ID.to_ne_bytes()[2], - CUSTOM_TAG_ID.to_ne_bytes()[3], + CUSTOM_TAG_ID.to_ne_bytes()[3], // end: my custom tag 13, 0, 0, - 0, // tag size + 0, // end: tag size 110, 97, 109, - 101, // ASCII string 'name' + 101, + 0, // end: null-terminated ASCII string 'name' 0, 0, + 0, // 3 byte padding 0, - 0, // null byte + padding 0, 0, - 0, - 0, // end tag type + 0, // end: end tag type 8, 0, 0, - 0, // end tag size + 0, // end: end tag size ]); let addr = bytes.0.as_ptr() as usize; let bi = unsafe { load(addr) }; @@ -1538,20 +1594,14 @@ mod tests { assert_eq!(addr + bytes.0.len(), bi.end_address()); assert_eq!(bytes.0.len(), bi.total_size()); - #[repr(C, align(8))] - struct CustomTag { - tag: TagTypeId, - size: u32, - name: u8, - } - let tag = bi.get_tag::(CUSTOM_TAG_ID).unwrap(); - // strlen without null byte - let strlen = tag.size as usize - mem::size_of::(); - let bytes = unsafe { slice::from_raw_parts((&tag.name) as *const u8, strlen) }; - let name = core::str::from_utf8(bytes).unwrap(); - assert_eq!(name, "name"); + let custom_tag_base_size = 8; + // bytes with null byte + let c_str_bytes = tag.size as usize - custom_tag_base_size; + let bytes = unsafe { slice::from_raw_parts((&tag.name) as *const u8, c_str_bytes) }; + let name = Tag::get_dst_str_slice(bytes); + assert_eq!(name, Ok("name")); } /// Tests that `get_tag` can consume multiple types that implement `Into` diff --git a/multiboot2/src/module.rs b/multiboot2/src/module.rs index 555cf89a..91f06e42 100644 --- a/multiboot2/src/module.rs +++ b/multiboot2/src/module.rs @@ -5,31 +5,27 @@ use core::str::Utf8Error; /// This tag indicates to the kernel what boot module was loaded along with /// the kernel image, and where it can be found. -#[derive(Clone, Copy)] #[repr(C, packed)] // only repr(C) would add unwanted padding near name_byte. +#[derive(ptr_meta::Pointee)] pub struct ModuleTag { typ: TagTypeId, size: u32, mod_start: u32, mod_end: u32, /// Null-terminated UTF-8 string - cmdline_str: u8, + cmdline: [u8], } impl ModuleTag { - /// Returns the cmdline of the module. - /// This is an null-terminated UTF-8 string. If this returns `Err` then perhaps the memory - /// is invalid or the bootloader doesn't follow the spec. + /// Reads the command line of the boot module as Rust string slice without + /// the null-byte. /// - /// For example: If the GRUB configuration contains - /// `module2 /foobar/some_boot_module --test cmdline-option` then this method - /// will return `--test cmdline-option`. + /// For example, this returns `"--test cmdline-option"`.if the GRUB config + /// contains `"module2 /some_boot_module --test cmdline-option"`. + /// + /// If the function returns `Err` then perhaps the memory is invalid. pub fn cmdline(&self) -> Result<&str, Utf8Error> { - use core::{mem, slice, str}; - // strlen without null byte - let strlen = self.size as usize - mem::size_of::(); - let bytes = unsafe { slice::from_raw_parts((&self.cmdline_str) as *const u8, strlen) }; - str::from_utf8(bytes) + Tag::get_dst_str_slice(&self.cmdline) } /// Start address of the module. @@ -48,12 +44,21 @@ impl ModuleTag { } } +impl crate::TagTrait for ModuleTag { + fn dst_size(base_tag: &Tag) -> usize { + // The size of the sized portion of the module tag. + let tag_base_size = 16; + base_tag.size as usize - tag_base_size + } +} + impl Debug for ModuleTag { fn fmt(&self, f: &mut Formatter<'_>) -> core::fmt::Result { f.debug_struct("ModuleTag") .field("type", &{ self.typ }) .field("size (tag)", &{ self.size }) .field("size (module)", &self.module_size()) + // Trick to print as hex. .field("mod_start", &(self.mod_start as *const usize)) .field("mod_end", &(self.mod_end as *const usize)) .field("cmdline", &self.cmdline()) @@ -77,7 +82,7 @@ impl<'a> Iterator for ModuleIter<'a> { fn next(&mut self) -> Option<&'a ModuleTag> { self.iter .find(|tag| tag.typ == TagType::Module) - .map(|tag| unsafe { &*(tag as *const Tag as *const ModuleTag) }) + .map(|tag| tag.cast_tag()) } } @@ -93,7 +98,7 @@ impl<'a> Debug for ModuleIter<'a> { #[cfg(test)] mod tests { - use crate::TagType; + use crate::{ModuleTag, Tag, TagType}; const MSG: &str = "hello"; @@ -121,7 +126,8 @@ mod tests { #[test] fn test_parse_str() { let tag = get_bytes(); - let tag = unsafe { tag.as_ptr().cast::().as_ref().unwrap() }; + let tag = unsafe { &*tag.as_ptr().cast::() }; + let tag = tag.cast_tag::(); assert_eq!({ tag.typ }, TagType::Module); assert_eq!(tag.cmdline().expect("must be valid UTF-8"), MSG); } diff --git a/multiboot2/src/tag_type.rs b/multiboot2/src/tag_type.rs index 3d5132c3..c8bd22f4 100644 --- a/multiboot2/src/tag_type.rs +++ b/multiboot2/src/tag_type.rs @@ -5,9 +5,11 @@ //! - [`TagType`] //! - [`Tag`] +use crate::TagTrait; use core::fmt::{Debug, Formatter}; use core::hash::Hash; use core::marker::PhantomData; +use core::str::Utf8Error; /// Serialized form of [`TagType`] that matches the binary representation /// (`u32`). The abstraction corresponds to the `typ`/`type` field of a @@ -284,6 +286,9 @@ mod partial_eq_impls { /// Common base structure for all tags that can be passed via the Multiboot2 /// information structure (MBI) to a Multiboot2 payload/program/kernel. /// +/// Can be transformed to any other tag (sized or unsized/DST) via +/// [`Tag::cast_tag`]. +/// /// Do not confuse them with the Multiboot2 header tags. They are something /// different. #[derive(Clone, Copy)] @@ -296,8 +301,35 @@ pub struct Tag { impl Tag { /// Casts the base tag to the specific tag type. - pub fn cast_tag<'a, T>(&self) -> &'a T { - unsafe { &*(self as *const Tag as *const T) } + pub fn cast_tag<'a, T: TagTrait + ?Sized>(&self) -> &'a T { + // Safety: At this point, we trust that "self.size" and the size hint + // for DST tags are sane. + unsafe { TagTrait::from_base_tag(self) } + } + + /// Some multiboot2 tags are a DST as they end with a dynamically sized byte + /// slice. This function parses this slice as [`str`] so that either a valid + /// UTF-8 Rust string slice without a terminating null byte or an error is + /// returned. + pub fn get_dst_str_slice(bytes: &[u8]) -> Result<&str, Utf8Error> { + if bytes.is_empty() { + // Very unlikely. A sane bootloader would omit the tag in this case. + // But better be safe. + return Ok(""); + } + + // Return without a trailing null byte. By spec, the null byte should + // always terminate the string. However, for safety, we do make an extra + // check. + let str_slice = if bytes.ends_with(&[b'\0']) { + let str_len = bytes.len() - 1; + &bytes[0..str_len] + } else { + // Unlikely that a bootloader doesn't follow the spec and does not + // add a terminating null byte. + bytes + }; + core::str::from_utf8(str_slice) } } @@ -429,4 +461,21 @@ mod tests { assert_eq!(tag_type_from_id, tag_type_from_u16) } } + + #[test] + fn test_get_dst_str_slice() { + // unlikely case + assert_eq!(Ok(""), Tag::get_dst_str_slice(&[])); + // also unlikely case + assert_eq!(Ok(""), Tag::get_dst_str_slice(&[b'\0'])); + // unlikely case: missing null byte. but the lib can cope with that + assert_eq!(Ok("foobar"), Tag::get_dst_str_slice("foobar".as_bytes())); + // test that the null bytes is not included in the string slice + assert_eq!(Ok("foobar"), Tag::get_dst_str_slice("foobar\0".as_bytes())); + // test invalid utf8 + assert!(matches!( + Tag::get_dst_str_slice(&[0xff, 0xff]), + Err(Utf8Error { .. }) + )); + } }