diff --git a/crates/examples/Cargo.toml b/crates/examples/Cargo.toml index fa74cbd8..4f21f983 100644 --- a/crates/examples/Cargo.toml +++ b/crates/examples/Cargo.toml @@ -8,9 +8,9 @@ gimli = { path = "../..", default-features = false } crossbeam = "0.8" fallible-iterator = { version = "0.3.0", default-features = false, optional = true } getopts = "0.2" -memmap2 = "0.7.1" +memmap2 = "0.9.4" num_cpus = "1" -object = { version = "0.32.0", features = ["wasm"] } +object = { version = "0.35.0", features = ["wasm", "write"] } rayon = "1.0" regex = "1" typed-arena = "2" @@ -19,8 +19,9 @@ typed-arena = "2" read = ["gimli/read"] read-all = ["read", "std", "fallible-iterator"] fallible-iterator = ["dep:fallible-iterator", "gimli/fallible-iterator"] +write = ["gimli/write"] std = ["gimli/std"] -default = ["read", "std", "fallible-iterator"] +default = ["read-all", "write"] [[bin]] name = "simple" @@ -30,6 +31,10 @@ required-features = ["read", "std"] name = "simple_line" required-features = ["read", "std"] +[[bin]] +name = "simple_write" +required-features = ["write"] + [[bin]] name = "dwarfdump" required-features = ["read", "std", "fallible-iterator"] diff --git a/crates/examples/src/bin/dwarfdump.rs b/crates/examples/src/bin/dwarfdump.rs index 2902f5df..c9969996 100644 --- a/crates/examples/src/bin/dwarfdump.rs +++ b/crates/examples/src/bin/dwarfdump.rs @@ -3,7 +3,7 @@ use fallible_iterator::FallibleIterator; use gimli::{Section, UnitHeader, UnitOffset, UnitSectionOffset, UnitType, UnwindSection}; -use object::{Object, ObjectSection, ObjectSymbol}; +use object::{Object, ObjectSection}; use regex::bytes::Regex; use std::borrow::Cow; use std::cmp::min; @@ -154,202 +154,36 @@ impl<'input, Endian> Reader for gimli::EndianSlice<'input, Endian> where { } -type RelocationMap = HashMap; +#[derive(Debug, Default)] +struct RelocationMap(object::read::RelocationMap); -fn add_relocations( - relocations: &mut RelocationMap, - file: &object::File, - section: &object::Section, -) { - for (offset64, mut relocation) in section.relocations() { - let offset = offset64 as usize; - if offset as u64 != offset64 { - continue; - } - // There are other things we could match but currently don't - #[allow(clippy::single_match)] - match relocation.kind() { - object::RelocationKind::Absolute => { - match relocation.target() { - object::RelocationTarget::Symbol(symbol_idx) => { - match file.symbol_by_index(symbol_idx) { - Ok(symbol) => { - let addend = - symbol.address().wrapping_add(relocation.addend() as u64); - relocation.set_addend(addend as i64); - } - Err(_) => { - eprintln!( - "Relocation with invalid symbol for section {} at offset 0x{:08x}", - section.name().unwrap(), - offset - ); - } - } - } - _ => {} - } - if relocations.insert(offset, relocation).is_some() { - eprintln!( - "Multiple relocations for section {} at offset 0x{:08x}", - section.name().unwrap(), - offset - ); - } - } - _ => { +impl RelocationMap { + fn add(&mut self, file: &object::File, section: &object::Section) { + for (offset, relocation) in section.relocations() { + if let Err(e) = self.0.add(file, offset, relocation) { eprintln!( - "Unsupported relocation for section {} at offset 0x{:08x}", + "Relocation error for section {} at offset 0x{:08x}: {}", section.name().unwrap(), - offset + offset, + e ); } } } } -/// Apply relocations to addresses and offsets during parsing, -/// instead of requiring the data to be fully relocated prior -/// to parsing. -/// -/// Pros -/// - allows readonly buffers, we don't need to implement writing of values back to buffers -/// - potentially allows us to handle addresses and offsets differently -/// - potentially allows us to add metadata from the relocation (eg symbol names) -/// Cons -/// - maybe incomplete -#[derive(Debug, Clone)] -struct Relocate<'a, R: gimli::Reader> { - relocations: &'a RelocationMap, - section: R, - reader: R, -} - -impl<'a, R: gimli::Reader> Relocate<'a, R> { - fn relocate(&self, offset: usize, value: u64) -> u64 { - if let Some(relocation) = self.relocations.get(&offset) { - // There are other things we could match but currently don't - #[allow(clippy::single_match)] - match relocation.kind() { - object::RelocationKind::Absolute => { - if relocation.has_implicit_addend() { - // Use the explicit addend too, because it may have the symbol value. - return value.wrapping_add(relocation.addend() as u64); - } else { - return relocation.addend() as u64; - } - } - _ => {} - } - }; - value - } -} - -impl<'a, R: gimli::Reader> gimli::Reader for Relocate<'a, R> { - type Endian = R::Endian; - type Offset = R::Offset; - - fn read_address(&mut self, address_size: u8) -> gimli::Result { - let offset = self.reader.offset_from(&self.section); - let value = self.reader.read_address(address_size)?; - Ok(self.relocate(offset, value)) - } - - fn read_length(&mut self, format: gimli::Format) -> gimli::Result { - let offset = self.reader.offset_from(&self.section); - let value = self.reader.read_length(format)?; - ::from_u64(self.relocate(offset, value as u64)) - } - - fn read_offset(&mut self, format: gimli::Format) -> gimli::Result { - let offset = self.reader.offset_from(&self.section); - let value = self.reader.read_offset(format)?; - ::from_u64(self.relocate(offset, value as u64)) - } - - fn read_sized_offset(&mut self, size: u8) -> gimli::Result { - let offset = self.reader.offset_from(&self.section); - let value = self.reader.read_sized_offset(size)?; - ::from_u64(self.relocate(offset, value as u64)) - } - - #[inline] - fn split(&mut self, len: Self::Offset) -> gimli::Result { - let mut other = self.clone(); - other.reader.truncate(len)?; - self.reader.skip(len)?; - Ok(other) - } - - // All remaining methods simply delegate to `self.reader`. - - #[inline] - fn endian(&self) -> Self::Endian { - self.reader.endian() - } - - #[inline] - fn len(&self) -> Self::Offset { - self.reader.len() - } - - #[inline] - fn empty(&mut self) { - self.reader.empty() - } - - #[inline] - fn truncate(&mut self, len: Self::Offset) -> gimli::Result<()> { - self.reader.truncate(len) +impl<'a> gimli::read::Relocate for &'a RelocationMap { + fn relocate_address(&self, offset: usize, value: u64) -> gimli::Result { + Ok(self.0.relocate(offset as u64, value)) } - #[inline] - fn offset_from(&self, base: &Self) -> Self::Offset { - self.reader.offset_from(&base.reader) - } - - #[inline] - fn offset_id(&self) -> gimli::ReaderOffsetId { - self.reader.offset_id() - } - - #[inline] - fn lookup_offset_id(&self, id: gimli::ReaderOffsetId) -> Option { - self.reader.lookup_offset_id(id) - } - - #[inline] - fn find(&self, byte: u8) -> gimli::Result { - self.reader.find(byte) - } - - #[inline] - fn skip(&mut self, len: Self::Offset) -> gimli::Result<()> { - self.reader.skip(len) - } - - #[inline] - fn to_slice(&self) -> gimli::Result> { - self.reader.to_slice() - } - - #[inline] - fn to_string(&self) -> gimli::Result> { - self.reader.to_string() - } - - #[inline] - fn to_string_lossy(&self) -> gimli::Result> { - self.reader.to_string_lossy() - } - - #[inline] - fn read_slice(&mut self, buf: &mut [u8]) -> gimli::Result<()> { - self.reader.read_slice(buf) + fn relocate_offset(&self, offset: usize, value: usize) -> gimli::Result { + ::from_u64(self.0.relocate(offset as u64, value as u64)) } } +type Relocate<'a, R> = gimli::RelocateReader; + impl<'a, R: Reader> Reader for Relocate<'a, R> {} #[derive(Default)] @@ -561,21 +395,6 @@ fn main() { } } -fn empty_file_section( - endian: Endian, - arena_relocations: &Arena, -) -> Relocate<'_, gimli::EndianSlice<'_, Endian>> { - let reader = gimli::EndianSlice::new(&[], endian); - let section = reader; - let relocations = RelocationMap::default(); - let relocations = arena_relocations.alloc(relocations); - Relocate { - relocations, - section, - reader, - } -} - fn load_file_section<'input, 'arena, Endian: gimli::Endianity>( id: gimli::SectionId, file: &object::File<'input>, @@ -597,7 +416,7 @@ fn load_file_section<'input, 'arena, Endian: gimli::Endianity>( Some(ref section) => { // DWO sections never have relocations, so don't bother. if !is_dwo { - add_relocations(&mut relocations, file, section); + relocations.add(file, section); } section.uncompressed_data()? } @@ -605,14 +424,9 @@ fn load_file_section<'input, 'arena, Endian: gimli::Endianity>( None => Cow::Owned(Vec::with_capacity(1)), }; let data_ref = arena_data.alloc(data); - let reader = gimli::EndianSlice::new(data_ref, endian); - let section = reader; + let section = gimli::EndianSlice::new(data_ref, endian); let relocations = arena_relocations.alloc(relocations); - Ok(Relocate { - relocations, - section, - reader, - }) + Ok(Relocate::new(section, relocations)) } fn dump_file(file: &object::File, endian: Endian, flags: &Flags) -> Result<()> @@ -672,7 +486,8 @@ where let w = &mut BufWriter::new(io::stdout()); if flags.dwp { - let empty = empty_file_section(endian, &arena_relocations); + let empty_relocations = arena_relocations.alloc(RelocationMap::default()); + let empty = Relocate::new(gimli::EndianSlice::new(&[], endian), empty_relocations); let dwp = gimli::DwarfPackage::load(&mut load_section, empty)?; dump_dwp(w, &dwp, dwo_parent.unwrap(), dwo_parent_units, flags)?; w.flush()?; diff --git a/crates/examples/src/bin/simple.rs b/crates/examples/src/bin/simple.rs index 046747d3..c55f7e67 100644 --- a/crates/examples/src/bin/simple.rs +++ b/crates/examples/src/bin/simple.rs @@ -7,9 +7,38 @@ //! Most of the complexity is due to loading the sections from the object //! file and DWP file, which is not something that is provided by gimli itself. +use gimli::Reader as _; use object::{Object, ObjectSection}; use std::{borrow, env, error, fs}; +// This is a simple wrapper around `object::read::RelocationMap` that implements +// `gimli::read::Relocate` for use with `gimli::RelocateReader`. +// You only need this if you are parsing relocatable object files. +#[derive(Debug, Default)] +struct RelocationMap(object::read::RelocationMap); + +impl<'a> gimli::read::Relocate for &'a RelocationMap { + fn relocate_address(&self, offset: usize, value: u64) -> gimli::Result { + Ok(self.0.relocate(offset as u64, value)) + } + + fn relocate_offset(&self, offset: usize, value: usize) -> gimli::Result { + ::from_u64(self.0.relocate(offset as u64, value as u64)) + } +} + +// The section data that will be stored in `DwarfSections` and `DwarfPackageSections`. +#[derive(Default)] +struct Section<'data> { + data: borrow::Cow<'data, [u8]>, + relocations: RelocationMap, +} + +// The reader type that will be stored in `Dwarf` and `DwarfPackage`. +// If you don't need relocations, you can use `gimli::EndianSlice` directly. +type Reader<'data> = + gimli::RelocateReader, &'data RelocationMap>; + fn main() { let mut args = env::args(); if args.len() != 2 && args.len() != 3 { @@ -46,19 +75,28 @@ fn dump_file( dwp_object: Option<&object::File>, endian: gimli::RunTimeEndian, ) -> Result<(), Box> { - // Load a section and return as `Cow<[u8]>`. - fn load_section<'a>( - object: &'a object::File, + // Load a `Section` that may own its data. + fn load_section<'data>( + object: &object::File<'data>, name: &str, - ) -> Result, Box> { + ) -> Result, Box> { Ok(match object.section_by_name(name) { - Some(section) => section.uncompressed_data()?, - None => borrow::Cow::Borrowed(&[]), + Some(section) => Section { + data: section.uncompressed_data()?, + relocations: section.relocation_map().map(RelocationMap)?, + }, + None => Default::default(), }) } - // Borrow a `Cow<[u8]>` to create an `EndianSlice`. - let borrow_section = |section| gimli::EndianSlice::new(borrow::Cow::as_ref(section), endian); + // Borrow a `Section` to create a `Reader`. + fn borrow_section<'data>( + section: &'data Section<'data>, + endian: gimli::RunTimeEndian, + ) -> Reader<'data> { + let slice = gimli::EndianSlice::new(borrow::Cow::as_ref(§ion.data), endian); + gimli::RelocateReader::new(slice, §ion.relocations) + } // Load all of the sections. let dwarf_sections = gimli::DwarfSections::load(|id| load_section(object, id.name()))?; @@ -68,13 +106,17 @@ fn dump_file( }) .transpose()?; - // Create `EndianSlice`s for all of the sections and do preliminary parsing. + let empty_relocations = RelocationMap::default(); + let empty_section = + gimli::RelocateReader::new(gimli::EndianSlice::new(&[], endian), &empty_relocations); + + // Create `Reader`s for all of the sections and do preliminary parsing. // Alternatively, we could have used `Dwarf::load` with an owned type such as `EndianRcSlice`. - let dwarf = dwarf_sections.borrow(borrow_section); + let dwarf = dwarf_sections.borrow(|section| borrow_section(section, endian)); let dwp = dwp_sections .as_ref() .map(|dwp_sections| { - dwp_sections.borrow(borrow_section, gimli::EndianSlice::new(&[], endian)) + dwp_sections.borrow(|section| borrow_section(section, endian), empty_section) }) .transpose()?; @@ -86,7 +128,7 @@ fn dump_file( header.offset().as_debug_info_offset().unwrap().0 ); let unit = dwarf.unit(header)?; - dump_unit(&unit)?; + dump_unit(&dwarf, &unit)?; // Check for a DWO unit. let Some(dwp) = &dwp else { continue }; @@ -99,15 +141,13 @@ fn dump_file( continue; }; let unit = dwo.unit(header)?; - dump_unit(&unit)?; + dump_unit(&dwo, &unit)?; } Ok(()) } -fn dump_unit( - unit: &gimli::Unit>, -) -> Result<(), gimli::Error> { +fn dump_unit(dwarf: &gimli::Dwarf, unit: &gimli::Unit) -> Result<(), gimli::Error> { // Iterate over the Debugging Information Entries (DIEs) in the unit. let mut depth = 0; let mut entries = unit.entries(); @@ -118,7 +158,11 @@ fn dump_unit( // Iterate over the attributes in the DIE. let mut attrs = entry.attrs(); while let Some(attr) = attrs.next()? { - println!(" {}: {:?}", attr.name(), attr.value()); + print!(" {}: {:?}", attr.name(), attr.value()); + if let Ok(s) = dwarf.attr_string(unit, attr.value()) { + print!(" '{}'", s.to_string_lossy()?); + } + println!(); } } Ok(()) diff --git a/crates/examples/src/bin/simple_write.rs b/crates/examples/src/bin/simple_write.rs new file mode 100644 index 00000000..e66744fa --- /dev/null +++ b/crates/examples/src/bin/simple_write.rs @@ -0,0 +1,311 @@ +//! A small example for writing an object file containing DWARF sections. +//! +//! The resulting object file can be linked with a C runtime to create a complete executable: +//! ```sh +//! $ cargo run --bin simple_write +//! $ gcc -o hello hello.o +//! $ ./hello +//! Hello, world! +//! ``` +use gimli::write::{ + Address, AttributeValue, DwarfUnit, EndianVec, LineProgram, LineString, Range, RangeList, + RelocateWriter, Relocation, RelocationTarget, Sections, Writer, +}; +use gimli::{Encoding, Format, LineEncoding, LittleEndian}; + +/// Record information needed to write a section. +#[derive(Clone)] +struct Section { + data: EndianVec, + relocations: Vec, + id: Option, +} + +impl Section { + fn new() -> Self { + Self { + data: EndianVec::new(LittleEndian), + relocations: Vec::new(), + id: None, + } + } +} + +impl RelocateWriter for Section { + type Writer = EndianVec; + + fn writer(&self) -> &Self::Writer { + &self.data + } + + fn writer_mut(&mut self) -> &mut Self::Writer { + &mut self.data + } + + fn relocate(&mut self, relocation: Relocation) { + self.relocations.push(relocation); + } +} + +fn main() -> Result<(), Box> { + let mut obj = object::write::Object::new( + object::BinaryFormat::native_object(), + object::Architecture::X86_64, + object::Endianness::Little, + ); + + let comp_dir = *b"/tmp"; + let file_name = *b"hello.c"; + let main_name = *b"main"; + + let (main_symbol, main_size) = define_main(&mut obj)?; + let main_address = Address::Symbol { + // This is a user defined identifier for the symbol. + // In this case, we will use 0 to mean the main function. + symbol: 0, + addend: 0, + }; + + // Choose the encoding parameters. + let encoding = Encoding { + format: Format::Dwarf32, + version: 5, + address_size: 8, + }; + + // Create a container for a single compilation unit. + let mut dwarf = DwarfUnit::new(encoding); + + // Set attributes on the root DIE. + let range_list_id = dwarf.unit.ranges.add(RangeList(vec![Range::StartLength { + begin: main_address, + length: obj.symbol(main_symbol).size, + }])); + let root = dwarf.unit.root(); + let entry = dwarf.unit.get_mut(root); + entry.set( + gimli::DW_AT_producer, + AttributeValue::String((*b"gimli example").into()), + ); + entry.set( + gimli::DW_AT_language, + AttributeValue::Language(gimli::DW_LANG_C11), + ); + entry.set(gimli::DW_AT_name, AttributeValue::String(file_name.into())); + entry.set( + gimli::DW_AT_comp_dir, + AttributeValue::String(comp_dir.into()), + ); + entry.set(gimli::DW_AT_low_pc, AttributeValue::Address(main_address)); + entry.set( + gimli::DW_AT_ranges, + AttributeValue::RangeListRef(range_list_id), + ); + // DW_AT_stmt_list will be set automatically. + + // Add a line program for the main function. + // For this example, we will only have one line in the line program. + let line_strings = &mut dwarf.line_strings; + let mut line_program = LineProgram::new( + encoding, + LineEncoding::default(), + LineString::new(comp_dir, encoding, line_strings), + LineString::new(file_name, encoding, line_strings), + None, + ); + let dir_id = line_program.default_directory(); + let file_string = LineString::new(file_name, encoding, line_strings); + let file_id = line_program.add_file(file_string, dir_id, None); + line_program.begin_sequence(Some(main_address)); + line_program.row().file = file_id; + line_program.row().line = 2; + line_program.generate_row(); + line_program.end_sequence(main_size); + dwarf.unit.line_program = line_program; + + // Add a subprogram DIE for the main function. + // Note that this example does not include all attributes. + let subprogram = dwarf.unit.add(root, gimli::DW_TAG_subprogram); + let entry = dwarf.unit.get_mut(subprogram); + entry.set(gimli::DW_AT_external, AttributeValue::Flag(true)); + entry.set(gimli::DW_AT_name, AttributeValue::String(main_name.into())); + entry.set( + gimli::DW_AT_decl_file, + AttributeValue::FileIndex(Some(file_id)), + ); + entry.set(gimli::DW_AT_decl_line, AttributeValue::Udata(2)); + entry.set(gimli::DW_AT_decl_column, AttributeValue::Udata(5)); + entry.set(gimli::DW_AT_low_pc, AttributeValue::Address(main_address)); + entry.set(gimli::DW_AT_high_pc, AttributeValue::Udata(main_size)); + + // Build the DWARF sections. + // This will populate the sections with the DWARF data and relocations. + let mut sections = Sections::new(Section::new()); + dwarf.write(&mut sections)?; + + // Add the DWARF section data to the object file. + sections.for_each_mut(|id, section| -> object::write::Result<()> { + if section.data.len() == 0 { + return Ok(()); + } + let section_id = obj.add_section(Vec::new(), id.name().into(), object::SectionKind::Debug); + obj.set_section_data(section_id, section.data.take(), 1); + + // Record the section ID so that it can be used for relocations. + section.id = Some(section_id); + Ok(()) + })?; + + // Add the relocations to the object file. + sections.for_each(|_, section| -> object::write::Result<()> { + let Some(section_id) = section.id else { + debug_assert!(section.relocations.is_empty()); + return Ok(()); + }; + for reloc in §ion.relocations { + // The `eh_pe` field is not used in this example because we are not writing + // unwind information. + debug_assert!(reloc.eh_pe.is_none()); + let symbol = match reloc.target { + RelocationTarget::Section(id) => { + obj.section_symbol(sections.get(id).unwrap().id.unwrap()) + } + RelocationTarget::Symbol(id) => { + // The main function is the only symbol we have defined. + debug_assert_eq!(id, 0); + main_symbol + } + }; + obj.add_relocation( + section_id, + object::write::Relocation { + offset: reloc.offset as u64, + symbol, + addend: reloc.addend, + flags: object::RelocationFlags::Generic { + kind: object::RelocationKind::Absolute, + encoding: object::RelocationEncoding::Generic, + size: reloc.size * 8, + }, + }, + )?; + } + Ok(()) + })?; + + // Finally, write the object file. + let file = std::fs::File::create("hello.o")?; + obj.write_stream(file)?; + Ok(()) +} + +/// Define the data and symbol for the main function. +/// +/// This function is unrelated to gimli. It's a copy of the `simple_write` example +/// from the `object` crate. +fn define_main( + obj: &mut object::write::Object, +) -> Result<(object::write::SymbolId, u64), Box> { + // Add a file symbol (STT_FILE or equivalent). + obj.add_file_symbol((*b"hello.c").into()); + + // Generate code for the equivalent of this C function: + // int main() { + // puts("Hello, world!"); + // return 0; + // } + let mut main_data = Vec::new(); + // sub $0x28, %rsp + main_data.extend_from_slice(&[0x48, 0x83, 0xec, 0x28]); + // Handle different calling convention on Windows. + if cfg!(target_os = "windows") { + // lea 0x0(%rip), %rcx + main_data.extend_from_slice(&[0x48, 0x8d, 0x0d, 0x00, 0x00, 0x00, 0x00]); + } else { + // lea 0x0(%rip), %rdi + main_data.extend_from_slice(&[0x48, 0x8d, 0x3d, 0x00, 0x00, 0x00, 0x00]); + } + // R_X86_64_PC32 .rodata-0x4 + let s_reloc_offset = main_data.len() - 4; + let s_reloc_addend = -4; + let s_reloc_flags = object::RelocationFlags::Generic { + kind: object::RelocationKind::Relative, + encoding: object::RelocationEncoding::Generic, + size: 32, + }; + // call 14 + main_data.extend_from_slice(&[0xe8, 0x00, 0x00, 0x00, 0x00]); + // R_X86_64_PLT32 puts-0x4 + let puts_reloc_offset = main_data.len() - 4; + let puts_reloc_addend = -4; + let puts_reloc_flags = object::RelocationFlags::Generic { + kind: object::RelocationKind::PltRelative, + encoding: object::RelocationEncoding::X86Branch, + size: 32, + }; + // xor %eax, %eax + main_data.extend_from_slice(&[0x31, 0xc0]); + // add $0x28, %rsp + main_data.extend_from_slice(&[0x48, 0x83, 0xc4, 0x28]); + // ret + main_data.extend_from_slice(&[0xc3]); + + // Add the main function in its own subsection (equivalent to -ffunction-sections). + let (main_section, main_offset) = + obj.add_subsection(object::write::StandardSection::Text, b"main", &main_data, 1); + // Add a globally visible symbol for the main function. + let main_size = main_data.len() as u64; + let main_symbol = obj.add_symbol(object::write::Symbol { + name: (*b"main").into(), + value: main_offset, + size: main_size, + kind: object::SymbolKind::Text, + scope: object::SymbolScope::Linkage, + weak: false, + section: object::write::SymbolSection::Section(main_section), + flags: object::SymbolFlags::None, + }); + + // Add a read only string constant for the puts argument. + // We don't create a symbol for the constant, but instead refer to it by + // the section symbol and section offset. + let rodata_section = obj.section_id(object::write::StandardSection::ReadOnlyData); + let rodata_symbol = obj.section_symbol(rodata_section); + let s_offset = obj.append_section_data(rodata_section, b"Hello, world!\0", 1); + + // Relocation for the string constant. + obj.add_relocation( + main_section, + object::write::Relocation { + offset: s_reloc_offset as u64, + symbol: rodata_symbol, + addend: s_offset as i64 + s_reloc_addend, + flags: s_reloc_flags, + }, + )?; + + // External symbol for puts. + let puts_symbol = obj.add_symbol(object::write::Symbol { + name: (*b"puts").into(), + value: 0, + size: 0, + kind: object::SymbolKind::Text, + scope: object::SymbolScope::Dynamic, + weak: false, + section: object::write::SymbolSection::Undefined, + flags: object::SymbolFlags::None, + }); + + // Relocation for the call to puts. + obj.add_relocation( + main_section, + object::write::Relocation { + offset: puts_reloc_offset as u64, + symbol: puts_symbol, + addend: puts_reloc_addend, + flags: puts_reloc_flags, + }, + )?; + + Ok((main_symbol, main_size)) +} diff --git a/src/constants.rs b/src/constants.rs index b67e98ba..c193350f 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -25,7 +25,7 @@ #![allow(non_upper_case_globals)] #![allow(missing_docs)] -use core::fmt; +use core::{fmt, ops}; // The `dw!` macro turns this: // @@ -1342,6 +1342,14 @@ const DW_EH_PE_FORMAT_MASK: u8 = 0b0000_1111; // Ignores indirection bit. const DW_EH_PE_APPLICATION_MASK: u8 = 0b0111_0000; +impl ops::BitOr for DwEhPe { + type Output = DwEhPe; + + fn bitor(self, rhs: DwEhPe) -> DwEhPe { + DwEhPe(self.0 | rhs.0) + } +} + impl DwEhPe { /// Get the pointer encoding's format. #[inline] @@ -1397,13 +1405,13 @@ mod tests { #[test] fn test_dw_eh_pe_format() { - let encoding = DwEhPe(DW_EH_PE_pcrel.0 | DW_EH_PE_uleb128.0); + let encoding = DW_EH_PE_pcrel | DW_EH_PE_uleb128; assert_eq!(encoding.format(), DW_EH_PE_uleb128); } #[test] fn test_dw_eh_pe_application() { - let encoding = DwEhPe(DW_EH_PE_pcrel.0 | DW_EH_PE_uleb128.0); + let encoding = DW_EH_PE_pcrel | DW_EH_PE_uleb128; assert_eq!(encoding.application(), DW_EH_PE_pcrel); } @@ -1415,7 +1423,7 @@ mod tests { #[test] fn test_dw_eh_pe_is_valid_encoding_ok() { - let encoding = DwEhPe(DW_EH_PE_uleb128.0 | DW_EH_PE_pcrel.0); + let encoding = DW_EH_PE_uleb128 | DW_EH_PE_pcrel; assert!(encoding.is_valid_encoding()); assert!(DW_EH_PE_absptr.is_valid_encoding()); assert!(DW_EH_PE_omit.is_valid_encoding()); diff --git a/src/read/cfi.rs b/src/read/cfi.rs index d764219c..82cd8531 100644 --- a/src/read/cfi.rs +++ b/src/read/cfi.rs @@ -7168,9 +7168,8 @@ mod tests { cie.format = Format::Dwarf32; cie.version = 1; cie.augmentation = Some(Augmentation::default()); - cie.augmentation.as_mut().unwrap().lsda = Some(constants::DwEhPe( - constants::DW_EH_PE_funcrel.0 | constants::DW_EH_PE_absptr.0, - )); + cie.augmentation.as_mut().unwrap().lsda = + Some(constants::DW_EH_PE_funcrel | constants::DW_EH_PE_absptr); let mut fde = FrameDescriptionEntry { offset: 0, @@ -7369,8 +7368,7 @@ mod tests { #[test] fn test_parse_pointer_encoding_ok() { use crate::endianity::NativeEndian; - let expected = - constants::DwEhPe(constants::DW_EH_PE_uleb128.0 | constants::DW_EH_PE_pcrel.0); + let expected = constants::DW_EH_PE_uleb128 | constants::DW_EH_PE_pcrel; let input = [expected.0, 1, 2, 3, 4]; let input = &mut EndianSlice::new(&input, NativeEndian); assert_eq!(parse_pointer_encoding(input), Ok(expected)); @@ -7602,8 +7600,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_uleb128() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_uleb128.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_uleb128; let expected_rest = [1, 2, 3, 4]; let input = Section::with_endian(Endian::Little) @@ -7628,8 +7625,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_udata2() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_udata2.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata2; let expected_rest = [1, 2, 3, 4]; let input = Section::with_endian(Endian::Little) @@ -7654,8 +7650,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_udata4() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_udata4.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata4; let expected_rest = [1, 2, 3, 4]; let input = Section::with_endian(Endian::Little) @@ -7680,8 +7675,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_udata8() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_udata8.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_udata8; let expected_rest = [1, 2, 3, 4]; let input = Section::with_endian(Endian::Little) @@ -7706,8 +7700,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_sleb128() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_textrel.0 | constants::DW_EH_PE_sleb128.0); + let encoding = constants::DW_EH_PE_textrel | constants::DW_EH_PE_sleb128; let expected_rest = [1, 2, 3, 4]; let input = Section::with_endian(Endian::Little) @@ -7732,8 +7725,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_sdata2() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_sdata2.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata2; let expected_rest = [1, 2, 3, 4]; let expected = 0x111_i16; @@ -7759,8 +7751,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_sdata4() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_sdata4.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata4; let expected_rest = [1, 2, 3, 4]; let expected = 0x111_1111_i32; @@ -7786,8 +7777,7 @@ mod tests { #[test] fn test_parse_encoded_pointer_sdata8() { - let encoding = - constants::DwEhPe(constants::DW_EH_PE_absptr.0 | constants::DW_EH_PE_sdata8.0); + let encoding = constants::DW_EH_PE_absptr | constants::DW_EH_PE_sdata8; let expected_rest = [1, 2, 3, 4]; let expected = -0x11_1111_1222_2222_i64; diff --git a/src/read/mod.rs b/src/read/mod.rs index 61ad9615..b6ae0230 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -206,6 +206,9 @@ pub use self::endian_reader::*; mod reader; pub use self::reader::*; +mod relocate; +pub use self::relocate::*; + #[cfg(feature = "read")] mod abbrev; #[cfg(feature = "read")] diff --git a/src/read/relocate.rs b/src/read/relocate.rs new file mode 100644 index 00000000..15bff2d6 --- /dev/null +++ b/src/read/relocate.rs @@ -0,0 +1,153 @@ +#[cfg(feature = "read")] +use alloc::borrow::Cow; +use core::fmt::Debug; + +use crate::common::Format; +use crate::read::{Reader, ReaderOffset, ReaderOffsetId, Result}; + +/// Trait for relocating addresses and offsets while reading a section. +pub trait Relocate { + /// Relocate an address which was read from the given section offset. + fn relocate_address(&self, offset: T, value: u64) -> Result; + + /// Relocate a value which was read from the given section offset. + fn relocate_offset(&self, offset: T, value: T) -> Result; +} + +/// A `Reader` which applies relocations to addresses and offsets. +/// +/// This is useful for reading sections which contain relocations, +/// such as those in a relocatable object file. +/// It is generally not used for reading sections in an executable file. +#[derive(Debug, Clone)] +pub struct RelocateReader, T: Relocate> { + section: R, + reader: R, + relocate: T, +} + +impl RelocateReader +where + R: Reader, + T: Relocate, +{ + /// Create a new `RelocateReader` which applies relocations to the given section reader. + pub fn new(section: R, relocate: T) -> Self { + let reader = section.clone(); + Self { + section, + reader, + relocate, + } + } +} + +impl Reader for RelocateReader +where + R: Reader, + T: Relocate + Debug + Clone, +{ + type Endian = R::Endian; + type Offset = R::Offset; + + fn read_address(&mut self, address_size: u8) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_address(address_size)?; + self.relocate.relocate_address(offset, value) + } + + fn read_offset(&mut self, format: Format) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_offset(format)?; + self.relocate.relocate_offset(offset, value) + } + + fn read_sized_offset(&mut self, size: u8) -> Result { + let offset = self.reader.offset_from(&self.section); + let value = self.reader.read_sized_offset(size)?; + self.relocate.relocate_offset(offset, value) + } + + #[inline] + fn split(&mut self, len: Self::Offset) -> Result { + let mut other = self.clone(); + other.reader.truncate(len)?; + self.reader.skip(len)?; + Ok(other) + } + + // All remaining methods simply delegate to `self.reader`. + + #[inline] + fn endian(&self) -> Self::Endian { + self.reader.endian() + } + + #[inline] + fn len(&self) -> Self::Offset { + self.reader.len() + } + + #[inline] + fn empty(&mut self) { + self.reader.empty() + } + + #[inline] + fn truncate(&mut self, len: Self::Offset) -> Result<()> { + self.reader.truncate(len) + } + + #[inline] + fn offset_from(&self, base: &Self) -> Self::Offset { + self.reader.offset_from(&base.reader) + } + + #[inline] + fn offset_id(&self) -> ReaderOffsetId { + self.reader.offset_id() + } + + #[inline] + fn lookup_offset_id(&self, id: ReaderOffsetId) -> Option { + self.reader.lookup_offset_id(id) + } + + #[inline] + fn find(&self, byte: u8) -> Result { + self.reader.find(byte) + } + + #[inline] + fn skip(&mut self, len: Self::Offset) -> Result<()> { + self.reader.skip(len) + } + + #[cfg(not(feature = "read"))] + fn cannot_implement() -> super::reader::seal_if_no_alloc::Sealed { + super::reader::seal_if_no_alloc::Sealed + } + + #[cfg(feature = "read")] + #[inline] + fn to_slice(&self) -> Result> { + self.reader.to_slice() + } + + #[cfg(feature = "read")] + #[inline] + fn to_string(&self) -> Result> { + self.reader.to_string() + } + + #[cfg(feature = "read")] + #[inline] + fn to_string_lossy(&self) -> Result> { + self.reader.to_string_lossy() + } + + #[inline] + fn read_slice(&mut self, buf: &mut [u8]) -> Result<()> { + self.reader.read_slice(buf) + } +} diff --git a/src/write/cfi.rs b/src/write/cfi.rs index ef7af001..d1339615 100644 --- a/src/write/cfi.rs +++ b/src/write/cfi.rs @@ -913,16 +913,12 @@ mod tests { frames.add_fde(cie2_id, fde4.clone()); let mut cie3 = CommonInformationEntry::new(encoding, 1, 8, X86_64::RA); - cie3.fde_address_encoding = constants::DwEhPe( - constants::DW_EH_PE_pcrel.0 | constants::DW_EH_PE_sdata4.0, - ); - cie3.lsda_encoding = Some(constants::DwEhPe( - constants::DW_EH_PE_pcrel.0 | constants::DW_EH_PE_sdata4.0, - )); + cie3.fde_address_encoding = + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4; + cie3.lsda_encoding = + Some(constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4); cie3.personality = Some(( - constants::DwEhPe( - constants::DW_EH_PE_pcrel.0 | constants::DW_EH_PE_sdata4.0, - ), + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, Address::Constant(0x1235), )); cie3.signal_trampoline = true; diff --git a/src/write/mod.rs b/src/write/mod.rs index 47ba6319..9c891a05 100644 --- a/src/write/mod.rs +++ b/src/write/mod.rs @@ -70,6 +70,9 @@ pub use self::endian_vec::*; mod writer; pub use self::writer::*; +mod relocate; +pub use self::relocate::*; + #[macro_use] mod section; pub use self::section::*; diff --git a/src/write/relocate.rs b/src/write/relocate.rs new file mode 100644 index 00000000..ff8dde13 --- /dev/null +++ b/src/write/relocate.rs @@ -0,0 +1,280 @@ +use crate::constants; +use crate::write::{Address, Error, Result, Writer}; +use crate::SectionId; + +/// A relocation to be applied to a section. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct Relocation { + /// The offset within the section where the relocation should be applied. + pub offset: usize, + /// The size of the value to be relocated. + pub size: u8, + /// The target of the relocation. + pub target: RelocationTarget, + /// The addend to be applied to the relocated value. + pub addend: i64, + /// The pointer encoding for relocations in unwind information. + pub eh_pe: Option, +} + +/// The target of a relocation. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RelocationTarget { + /// The relocation target is a symbol. + /// + /// The meaning of this value is decided by the writer, but + /// will typically be an index into a symbol table. + Symbol(usize), + /// The relocation target is a section. + Section(SectionId), +} + +/// A `Writer` which also records relocations. +pub trait RelocateWriter { + /// The type of the writer being used to write the section data. + type Writer: Writer; + + /// Get the writer being used to write the section data. + fn writer(&self) -> &Self::Writer; + + /// Get the writer being used to write the section data. + fn writer_mut(&mut self) -> &mut Self::Writer; + + /// Record a relocation. + fn relocate(&mut self, relocation: Relocation); +} + +impl Writer for T { + type Endian = <::Writer as Writer>::Endian; + + fn endian(&self) -> Self::Endian { + self.writer().endian() + } + + fn len(&self) -> usize { + self.writer().len() + } + + fn write(&mut self, bytes: &[u8]) -> Result<()> { + self.writer_mut().write(bytes) + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> Result<()> { + self.writer_mut().write_at(offset, bytes) + } + + fn write_address(&mut self, address: Address, size: u8) -> Result<()> { + match address { + Address::Constant(val) => self.writer_mut().write_udata(val, size), + Address::Symbol { symbol, addend } => { + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Symbol(symbol), + addend, + eh_pe: None, + }); + self.writer_mut().write_udata(0, size) + } + } + } + + fn write_offset(&mut self, val: usize, section: SectionId, size: u8) -> Result<()> { + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Section(section), + addend: val as i64, + eh_pe: None, + }); + self.writer_mut().write_udata(0, size) + } + + fn write_offset_at( + &mut self, + offset: usize, + val: usize, + section: SectionId, + size: u8, + ) -> Result<()> { + self.relocate(Relocation { + offset, + size, + target: RelocationTarget::Section(section), + addend: val as i64, + eh_pe: None, + }); + self.writer_mut().write_udata_at(offset, 0, size) + } + + fn write_eh_pointer( + &mut self, + address: Address, + eh_pe: constants::DwEhPe, + size: u8, + ) -> Result<()> { + match address { + Address::Constant(_) => self.writer_mut().write_eh_pointer(address, eh_pe, size), + Address::Symbol { symbol, addend } => { + let size = match eh_pe.format() { + constants::DW_EH_PE_absptr => size, + constants::DW_EH_PE_udata2 => 2, + constants::DW_EH_PE_udata4 => 4, + constants::DW_EH_PE_udata8 => 8, + constants::DW_EH_PE_sdata2 => 2, + constants::DW_EH_PE_sdata4 => 4, + constants::DW_EH_PE_sdata8 => 8, + _ => return Err(Error::UnsupportedPointerEncoding(eh_pe)), + }; + self.relocate(Relocation { + offset: self.len(), + size, + target: RelocationTarget::Symbol(symbol), + addend, + eh_pe: Some(eh_pe), + }); + self.writer_mut().write_udata(0, size) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::write::EndianVec; + use crate::{LittleEndian, SectionId}; + use alloc::vec::Vec; + + struct Section { + writer: EndianVec, + relocations: Vec, + } + + impl RelocateWriter for Section { + type Writer = EndianVec; + + fn writer(&self) -> &Self::Writer { + &self.writer + } + + fn writer_mut(&mut self) -> &mut Self::Writer { + &mut self.writer + } + + fn relocate(&mut self, relocation: Relocation) { + self.relocations.push(relocation); + } + } + + #[test] + fn test_relocate_writer() { + let mut expected_data = Vec::new(); + let mut expected_relocations = Vec::new(); + + let mut section = Section { + writer: EndianVec::new(LittleEndian), + relocations: Vec::new(), + }; + + // No relocation for plain data. + section.write_udata(0x12345678, 4).unwrap(); + expected_data.extend_from_slice(&0x12345678u32.to_le_bytes()); + + // No relocation for a constant address. + section + .write_address(Address::Constant(0x87654321), 4) + .unwrap(); + expected_data.extend_from_slice(&0x87654321u32.to_le_bytes()); + + // Relocation for a symbol address. + let offset = section.len(); + section + .write_address( + Address::Symbol { + symbol: 1, + addend: 0x12345678, + }, + 4, + ) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Symbol(1), + addend: 0x12345678, + eh_pe: None, + }); + + // Relocation for a section offset. + let offset = section.len(); + section + .write_offset(0x12345678, SectionId::DebugAbbrev, 4) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Section(SectionId::DebugAbbrev), + addend: 0x12345678, + eh_pe: None, + }); + + // Relocation for a section offset at a specific offset. + let offset = section.len(); + section.write_udata(0x12345678, 4).unwrap(); + section + .write_offset_at(offset, 0x12345678, SectionId::DebugStr, 4) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Section(SectionId::DebugStr), + addend: 0x12345678, + eh_pe: None, + }); + + // No relocation for a constant in unwind information. + section + .write_eh_pointer(Address::Constant(0x87654321), constants::DW_EH_PE_absptr, 8) + .unwrap(); + expected_data.extend_from_slice(&0x87654321u64.to_le_bytes()); + + // No relocation for a relative constant in unwind information. + let offset = section.len(); + section + .write_eh_pointer( + Address::Constant(offset as u64 - 8), + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, + 8, + ) + .unwrap(); + expected_data.extend_from_slice(&(-8i32).to_le_bytes()); + + // Relocation for a symbol in unwind information. + let offset = section.len(); + section + .write_eh_pointer( + Address::Symbol { + symbol: 2, + addend: 0x12345678, + }, + constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4, + 8, + ) + .unwrap(); + expected_data.extend_from_slice(&[0; 4]); + expected_relocations.push(Relocation { + offset, + size: 4, + target: RelocationTarget::Symbol(2), + addend: 0x12345678, + eh_pe: Some(constants::DW_EH_PE_pcrel | constants::DW_EH_PE_sdata4), + }); + + assert_eq!(section.writer.into_vec(), expected_data); + assert_eq!(section.relocations, expected_relocations); + } +} diff --git a/src/write/section.rs b/src/write/section.rs index 3d3a0e1d..22a44345 100644 --- a/src/write/section.rs +++ b/src/write/section.rs @@ -120,6 +120,42 @@ impl Sections { } impl Sections { + /// Get the section with the given `id`. + pub fn get(&self, id: SectionId) -> Option<&W> { + match id { + SectionId::DebugAbbrev => Some(&self.debug_abbrev.0), + SectionId::DebugInfo => Some(&self.debug_info.0), + SectionId::DebugLine => Some(&self.debug_line.0), + SectionId::DebugLineStr => Some(&self.debug_line_str.0), + SectionId::DebugRanges => Some(&self.debug_ranges.0), + SectionId::DebugRngLists => Some(&self.debug_rnglists.0), + SectionId::DebugLoc => Some(&self.debug_loc.0), + SectionId::DebugLocLists => Some(&self.debug_loclists.0), + SectionId::DebugStr => Some(&self.debug_str.0), + SectionId::DebugFrame => Some(&self.debug_frame.0), + SectionId::EhFrame => Some(&self.eh_frame.0), + _ => None, + } + } + + /// Get the section with the given `id`. + pub fn get_mut(&mut self, id: SectionId) -> Option<&mut W> { + match id { + SectionId::DebugAbbrev => Some(&mut self.debug_abbrev.0), + SectionId::DebugInfo => Some(&mut self.debug_info.0), + SectionId::DebugLine => Some(&mut self.debug_line.0), + SectionId::DebugLineStr => Some(&mut self.debug_line_str.0), + SectionId::DebugRanges => Some(&mut self.debug_ranges.0), + SectionId::DebugRngLists => Some(&mut self.debug_rnglists.0), + SectionId::DebugLoc => Some(&mut self.debug_loc.0), + SectionId::DebugLocLists => Some(&mut self.debug_loclists.0), + SectionId::DebugStr => Some(&mut self.debug_str.0), + SectionId::DebugFrame => Some(&mut self.debug_frame.0), + SectionId::EhFrame => Some(&mut self.eh_frame.0), + _ => None, + } + } + /// For each section, call `f` once with a shared reference. pub fn for_each<'a, F, E>(&'a self, mut f: F) -> result::Result<(), E> where