diff --git a/Cargo.toml b/Cargo.toml index 80f3b9fce..dfd334502 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ arrayvec = { version = "0.4.6", default-features = false } byteorder = { version = "1.0", default-features = false } fallible-iterator = { version = "0.2.0", default-features = false } indexmap = { version = "1.0.2", optional = true } +smallvec = { version = "0.6.10", default-features = false } stable_deref_trait = { version = "1.1.0", default-features = false } [dev-dependencies] @@ -41,3 +42,7 @@ write = ["std", "indexmap"] std = ["fallible-iterator/std", "stable_deref_trait/std"] alloc = ["fallible-iterator/alloc", "stable_deref_trait/alloc"] default = ["read", "write", "std"] + +[profile.bench] +debug = true +codegen-units = 1 diff --git a/src/constants.rs b/src/constants.rs index 00e6f9993..bdff87163 100644 --- a/src/constants.rs +++ b/src/constants.rs @@ -163,7 +163,7 @@ dw!( /// The tag encodings for DIE attributes. /// /// See Section 7.5.3, Table 7.3. -DwTag(u64) { +DwTag(u16) { DW_TAG_null = 0x00, DW_TAG_array_type = 0x01, @@ -308,7 +308,7 @@ dw!( /// The attribute encodings for DIE attributes. /// /// See Section 7.5.4, Table 7.5. -DwAt(u64) { +DwAt(u16) { DW_AT_null = 0x00, DW_AT_sibling = 0x01, @@ -617,7 +617,7 @@ dw!( /// The attribute form encodings for DIE attributes. /// /// See Section 7.5.6, Table 7.6. -DwForm(u64) { +DwForm(u16) { DW_FORM_null = 0x00, DW_FORM_addr = 0x01, diff --git a/src/leb128.rs b/src/leb128.rs index aef21eb25..6a2f454fd 100644 --- a/src/leb128.rs +++ b/src/leb128.rs @@ -87,6 +87,29 @@ pub mod read { } } + /// Read an LEB128 u16 from the given `Reader` and + /// return it or an error if reading failed. + pub fn u16(r: &mut R) -> Result { + let byte = r.read_u8()?; + let mut result = u16::from(low_bits_of_byte(byte)); + if byte & CONTINUATION_BIT == 0 { + return Ok(result); + } + + let byte = r.read_u8()?; + result |= u16::from(low_bits_of_byte(byte)) << 7; + if byte & CONTINUATION_BIT == 0 { + return Ok(result); + } + + let byte = r.read_u8()?; + if byte > 0x03 { + return Err(Error::BadUnsignedLeb128); + } + result += u16::from(byte) << 14; + Ok(result) + } + /// Read a signed LEB128 number from the given `Reader` and /// return it or an error if reading failed. pub fn signed(r: &mut R) -> Result { @@ -516,4 +539,36 @@ mod tests { 1u64 ); } + + #[test] + fn test_read_u16() { + for (buf, val) in [ + (&[2][..], 2), + (&[0x7f][..], 0x7f), + (&[0x80, 1][..], 0x80), + (&[0x81, 1][..], 0x81), + (&[0x82, 1][..], 0x82), + (&[0xff, 0x7f][..], 0x3fff), + (&[0x80, 0x80, 1][..], 0x4000), + (&[0xff, 0xff, 1][..], 0x7fff), + (&[0xff, 0xff, 3][..], 0xffff), + ] + .iter() + { + let mut readable = EndianSlice::new(buf, NativeEndian); + assert_eq!(*val, read::u16(&mut readable).expect("Should read number")); + } + + for buf in [ + &[0x80][..], + &[0x80, 0x80][..], + &[0x80, 0x80, 4][..], + &[0x80, 0x80, 0x80, 3][..], + ] + .iter() + { + let mut readable = EndianSlice::new(buf, NativeEndian); + assert!(read::u16(&mut readable).is_err(), format!("{:?}", buf)); + } + } } diff --git a/src/read/abbrev.rs b/src/read/abbrev.rs index ef75f6934..16b258f9b 100644 --- a/src/read/abbrev.rs +++ b/src/read/abbrev.rs @@ -2,6 +2,7 @@ use crate::collections::btree_map; use crate::vec::Vec; +use smallvec::SmallVec; use crate::common::{DebugAbbrevOffset, SectionId}; use crate::constants; @@ -173,6 +174,9 @@ impl Abbreviations { } } +// Length of 5 based on benchmark results for both x86-64 and i686. +type Attributes = SmallVec<[AttributeSpecification; 5]>; + /// An abbreviation describes the shape of a `DebuggingInformationEntry`'s type: /// its code, tag type, whether it has children, and its set of attributes. #[derive(Debug, Clone, PartialEq, Eq)] @@ -180,7 +184,7 @@ pub struct Abbreviation { code: u64, tag: constants::DwTag, has_children: constants::DwChildren, - attributes: Vec, + attributes: Attributes, } impl Abbreviation { @@ -193,7 +197,7 @@ impl Abbreviation { code: u64, tag: constants::DwTag, has_children: constants::DwChildren, - attributes: Vec, + attributes: Attributes, ) -> Abbreviation { assert_ne!(code, 0); Abbreviation { @@ -230,7 +234,7 @@ impl Abbreviation { /// Parse an abbreviation's tag. fn parse_tag(input: &mut R) -> Result { - let val = input.read_uleb128()?; + let val = input.read_uleb128_u16()?; if val == 0 { Err(Error::AbbreviationTagZero) } else { @@ -251,8 +255,8 @@ impl Abbreviation { /// Parse a series of attribute specifications, terminated by a null attribute /// specification. - fn parse_attributes(input: &mut R) -> Result> { - let mut attrs = Vec::new(); + fn parse_attributes(input: &mut R) -> Result { + let mut attrs = SmallVec::new(); while let Some(attr) = AttributeSpecification::parse(input)? { attrs.push(attr); @@ -373,7 +377,7 @@ impl AttributeSpecification { /// Parse an attribute's form. fn parse_form(input: &mut R) -> Result { - let val = input.read_uleb128()?; + let val = input.read_uleb128_u16()?; if val == 0 { Err(Error::AttributeFormZero) } else { @@ -384,10 +388,10 @@ impl AttributeSpecification { /// Parse an attribute specification. Returns `None` for the null attribute /// specification, `Some` for an actual attribute specification. fn parse(input: &mut R) -> Result> { - let name = input.read_uleb128()?; + let name = input.read_uleb128_u16()?; if name == 0 { // Parse the null attribute specification. - let form = input.read_uleb128()?; + let form = input.read_uleb128_u16()?; return if form == 0 { Ok(None) } else { @@ -414,6 +418,7 @@ pub mod tests { use crate::endianity::LittleEndian; use crate::read::{EndianSlice, Error}; use crate::test_util::GimliSectionMethods; + use smallvec::smallvec; #[cfg(target_pointer_width = "32")] use std::u32; use test_assembler::Section; @@ -428,7 +433,7 @@ pub mod tests { impl AbbrevSectionMethods for Section { fn abbrev(self, code: u64, tag: constants::DwTag, children: constants::DwChildren) -> Self { - self.uleb(code).uleb(tag.0).D8(children.0) + self.uleb(code).uleb(tag.0.into()).D8(children.0) } fn abbrev_null(self) -> Self { @@ -436,12 +441,12 @@ pub mod tests { } fn abbrev_attr(self, name: constants::DwAt, form: constants::DwForm) -> Self { - self.uleb(name.0).uleb(form.0) + self.uleb(name.0.into()).uleb(form.0.into()) } fn abbrev_attr_implicit_const(self, name: constants::DwAt, value: i64) -> Self { - self.uleb(name.0) - .uleb(constants::DW_FORM_implicit_const.0) + self.uleb(name.0.into()) + .uleb(constants::DW_FORM_implicit_const.0.into()) .sleb(value) } @@ -473,7 +478,7 @@ pub mod tests { 1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes, - vec![ + smallvec![ AttributeSpecification::new( constants::DW_AT_producer, constants::DW_FORM_strp, @@ -491,7 +496,7 @@ pub mod tests { 2, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no, - vec![AttributeSpecification::new( + smallvec![AttributeSpecification::new( constants::DW_AT_name, constants::DW_FORM_string, None, @@ -509,17 +514,17 @@ pub mod tests { #[test] fn test_abbreviations_insert() { - fn abbrev(code: u64) -> Abbreviation { + fn abbrev(code: u16) -> Abbreviation { Abbreviation::new( - code, + code.into(), constants::DwTag(code), constants::DW_CHILDREN_no, - vec![], + smallvec![], ) } - fn assert_abbrev(abbrevs: &Abbreviations, code: u64) { - let abbrev = abbrevs.get(code).unwrap(); + fn assert_abbrev(abbrevs: &Abbreviations, code: u16) { + let abbrev = abbrevs.get(code.into()).unwrap(); assert_eq!(abbrev.tag(), constants::DwTag(code)); } @@ -579,15 +584,15 @@ pub mod tests { fn abbrev(code: u64) -> Abbreviation { Abbreviation::new( code, - constants::DwTag(code), + constants::DwTag(code as u16), constants::DW_CHILDREN_no, - vec![], + smallvec![], ) } fn assert_abbrev(abbrevs: &Abbreviations, code: u64) { let abbrev = abbrevs.get(code).unwrap(); - assert_eq!(abbrev.tag(), constants::DwTag(code)); + assert_eq!(abbrev.tag(), constants::DwTag(code as u16)); } let mut abbrevs = Abbreviations::empty(); @@ -624,7 +629,7 @@ pub mod tests { 1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes, - vec![ + smallvec![ AttributeSpecification::new( constants::DW_AT_producer, constants::DW_FORM_strp, @@ -642,7 +647,7 @@ pub mod tests { 2, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no, - vec![AttributeSpecification::new( + smallvec![AttributeSpecification::new( constants::DW_AT_name, constants::DW_FORM_string, None, @@ -728,7 +733,7 @@ pub mod tests { 1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no, - vec![AttributeSpecification::new( + smallvec![AttributeSpecification::new( constants::DW_AT_name, constants::DW_FORM_string, None, @@ -756,7 +761,7 @@ pub mod tests { 1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no, - vec![AttributeSpecification::new( + smallvec![AttributeSpecification::new( constants::DW_AT_name, constants::DW_FORM_implicit_const, Some(-42), diff --git a/src/read/line.rs b/src/read/line.rs index 267e17bcc..65c5b0a6d 100644 --- a/src/read/line.rs +++ b/src/read/line.rs @@ -1695,7 +1695,7 @@ impl FileEntryFormat { path_count += 1; } - let form = constants::DwForm(input.read_uleb128()?); + let form = constants::DwForm(input.read_uleb128_u16()?); format.push(FileEntryFormat { content_type, form }); } diff --git a/src/read/reader.rs b/src/read/reader.rs index 4694caed1..dc281f724 100644 --- a/src/read/reader.rs +++ b/src/read/reader.rs @@ -391,6 +391,11 @@ pub trait Reader: Debug + Clone { leb128::read::unsigned(self) } + /// Read an unsigned LEB128 encoded u16. + fn read_uleb128_u16(&mut self) -> Result { + leb128::read::u16(self) + } + /// Read a signed LEB128 encoded integer. fn read_sleb128(&mut self) -> Result { leb128::read::signed(self) diff --git a/src/read/unit.rs b/src/read/unit.rs index d64abf31e..a31463ad1 100644 --- a/src/read/unit.rs +++ b/src/read/unit.rs @@ -1862,7 +1862,7 @@ pub(crate) fn parse_attribute<'unit, 'abbrev, R: Reader>( loop { let value = match form { constants::DW_FORM_indirect => { - let dynamic_form = input.read_uleb128()?; + let dynamic_form = input.read_uleb128_u16()?; form = constants::DwForm(dynamic_form); continue; } @@ -3128,6 +3128,7 @@ mod tests { }; use crate::test_util::GimliSectionMethods; use crate::vec::Vec; + use smallvec::smallvec; use std; use std::cell::Cell; use test_assembler::{Endian, Label, LabelMaker, Section}; @@ -4416,7 +4417,7 @@ mod tests { let bytes_written = { let mut writable = &mut buf[..]; - leb128::write::unsigned(&mut writable, constants::DW_FORM_udata.0) + leb128::write::unsigned(&mut writable, constants::DW_FORM_udata.0.into()) .expect("should write udata") + leb128::write::unsigned(&mut writable, 9_999_999).expect("should write value") }; @@ -4445,7 +4446,7 @@ mod tests { 42, constants::DW_TAG_subprogram, constants::DW_CHILDREN_yes, - vec![ + smallvec![ AttributeSpecification::new(constants::DW_AT_name, constants::DW_FORM_string, None), AttributeSpecification::new(constants::DW_AT_low_pc, constants::DW_FORM_addr, None), AttributeSpecification::new( @@ -4556,7 +4557,7 @@ mod tests { 42, constants::DW_TAG_subprogram, constants::DW_CHILDREN_yes, - vec![ + smallvec![ AttributeSpecification::new(constants::DW_AT_name, constants::DW_FORM_string, None), AttributeSpecification::new(constants::DW_AT_low_pc, constants::DW_FORM_addr, None), AttributeSpecification::new( diff --git a/src/read/value.rs b/src/read/value.rs index b1fc5d0aa..e64c29100 100644 --- a/src/read/value.rs +++ b/src/read/value.rs @@ -912,6 +912,7 @@ mod tests { Abbreviation, AttributeSpecification, DebuggingInformationEntry, EndianSlice, UnitHeader, UnitOffset, }; + use smallvec::smallvec; #[test] #[rustfmt::skip] @@ -932,7 +933,7 @@ mod tests { 42, constants::DW_TAG_base_type, constants::DW_CHILDREN_no, - vec![ + smallvec![ AttributeSpecification::new( constants::DW_AT_byte_size, constants::DW_FORM_udata, diff --git a/src/write/abbrev.rs b/src/write/abbrev.rs index 88c5e33d6..bf4faea4f 100644 --- a/src/write/abbrev.rs +++ b/src/write/abbrev.rs @@ -61,7 +61,7 @@ impl Abbreviation { /// Write the abbreviation to the `.debug_abbrev` section. pub fn write(&self, w: &mut DebugAbbrev) -> Result<()> { - w.write_uleb128(self.tag.0)?; + w.write_uleb128(self.tag.0.into())?; w.write_u8(if self.has_children { constants::DW_CHILDREN_yes.0 } else { @@ -94,8 +94,8 @@ impl AttributeSpecification { /// Write the attribute specification to the `.debug_abbrev` section. #[inline] pub fn write(&self, w: &mut DebugAbbrev) -> Result<()> { - w.write_uleb128(self.name.0)?; - w.write_uleb128(self.form.0) + w.write_uleb128(self.name.0.into())?; + w.write_uleb128(self.form.0.into()) } } diff --git a/src/write/line.rs b/src/write/line.rs index 64397e9fe..74ef28a8e 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -574,7 +574,7 @@ impl LineProgram { w.write_u8(1)?; w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; let dir_form = self.directories.get_index(0).unwrap().form(); - w.write_uleb128(dir_form.0)?; + w.write_uleb128(dir_form.0.into())?; // Directory entries. w.write_uleb128(self.directories.len() as u64)?; @@ -596,20 +596,20 @@ impl LineProgram { w.write_u8(count)?; w.write_uleb128(u64::from(constants::DW_LNCT_path.0))?; let file_form = self.comp_file.0.form(); - w.write_uleb128(file_form.0)?; + w.write_uleb128(file_form.0.into())?; w.write_uleb128(u64::from(constants::DW_LNCT_directory_index.0))?; - w.write_uleb128(constants::DW_FORM_udata.0)?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; if self.file_has_timestamp { w.write_uleb128(u64::from(constants::DW_LNCT_timestamp.0))?; - w.write_uleb128(constants::DW_FORM_udata.0)?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; } if self.file_has_size { w.write_uleb128(u64::from(constants::DW_LNCT_size.0))?; - w.write_uleb128(constants::DW_FORM_udata.0)?; + w.write_uleb128(constants::DW_FORM_udata.0.into())?; } if self.file_has_md5 { w.write_uleb128(u64::from(constants::DW_LNCT_MD5.0))?; - w.write_uleb128(constants::DW_FORM_data16.0)?; + w.write_uleb128(constants::DW_FORM_data16.0.into())?; } // File name entries.