From 5f88ccc18a1e76416d036d6f23c820169f92e5c3 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Tue, 12 Sep 2023 14:55:59 +1000 Subject: [PATCH 1/2] read: add Dwarf::set_abbreviations_cache_strategy --- crates/examples/src/bin/dwarfdump.rs | 2 + src/read/abbrev.rs | 88 ++++++++++++++++++++++++---- src/read/dwarf.rs | 20 ++++--- 3 files changed, 93 insertions(+), 17 deletions(-) diff --git a/crates/examples/src/bin/dwarfdump.rs b/crates/examples/src/bin/dwarfdump.rs index 84e10e04..eaf5039e 100644 --- a/crates/examples/src/bin/dwarfdump.rs +++ b/crates/examples/src/bin/dwarfdump.rs @@ -695,6 +695,8 @@ where dwarf.load_sup(&mut load_sup_section)?; } + dwarf.set_abbreviations_cache_strategy(gimli::AbbreviationsCacheStrategy::All); + if flags.eh_frame { let eh_frame = gimli::EhFrame::load(&mut load_section).unwrap(); dump_eh_frame(w, file, eh_frame)?; diff --git a/src/read/abbrev.rs b/src/read/abbrev.rs index 54f5cf8e..dd83c8d4 100644 --- a/src/read/abbrev.rs +++ b/src/read/abbrev.rs @@ -12,7 +12,9 @@ use crate::common::{DebugAbbrevOffset, Encoding, SectionId}; use crate::constants; use crate::endianity::Endianity; use crate::read::lazy::LazyArc; -use crate::read::{EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader}; +use crate::read::{ + DebugInfoUnitHeadersIter, EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader, +}; /// The `DebugAbbrev` struct represents the abbreviations describing /// `DebuggingInformationEntry`s' attribute names and forms found in the @@ -102,14 +104,36 @@ impl From for DebugAbbrev { } } +/// The strategy to use for caching abbreviations. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[non_exhaustive] +pub enum AbbreviationsCacheStrategy { + /// Don't cache anything. + None, + /// Cache the abbreviations at offset 0. + Zero, + /// Cache abbreviations that are used more than once. + Duplicates, + /// Cache all abbreviations. + All, +} + /// A cache of previously parsed `Abbreviations`. /// -/// Currently this only caches the abbreviations for offset 0, -/// since this is a common case in which abbreviations are reused. -/// This strategy may change in future if there is sufficient need. -#[derive(Debug, Default)] +/// This defaults to only caching the abbreviations for offset 0, +/// since this is a common case in which abbreviations are reused, +/// and doesn't require additional parsing. +#[derive(Debug)] pub struct AbbreviationsCache { - abbreviations: LazyArc, + abbreviations: btree_map::BTreeMap>, +} + +impl Default for AbbreviationsCache { + fn default() -> Self { + let mut abbreviations = btree_map::BTreeMap::new(); + abbreviations.insert(0, LazyArc::default()); + AbbreviationsCache { abbreviations } + } } impl AbbreviationsCache { @@ -118,6 +142,51 @@ impl AbbreviationsCache { Self::default() } + /// Set the strategy for the abbreviations cache. + /// + /// This discards any existing cache entries. + pub fn set_strategy( + &mut self, + strategy: AbbreviationsCacheStrategy, + mut units: DebugInfoUnitHeadersIter, + ) { + let mut offsets = Vec::new(); + match strategy { + AbbreviationsCacheStrategy::None => {} + AbbreviationsCacheStrategy::Zero => { + offsets.push(0); + } + AbbreviationsCacheStrategy::Duplicates => { + while let Ok(Some(unit)) = units.next() { + offsets.push(unit.debug_abbrev_offset().0.into_u64()); + } + offsets.sort_unstable(); + let mut prev_offset = 0; + let mut count = 0; + offsets.retain(|offset| { + if count == 0 || prev_offset != *offset { + prev_offset = *offset; + count = 1; + } else { + count += 1; + } + count == 2 + }); + } + AbbreviationsCacheStrategy::All => { + while let Ok(Some(unit)) = units.next() { + offsets.push(unit.debug_abbrev_offset().0.into_u64()); + } + offsets.sort_unstable(); + offsets.dedup(); + } + } + self.abbreviations = offsets + .into_iter() + .map(|offset| (offset, LazyArc::default())) + .collect(); + } + /// Parse the abbreviations at the given offset. /// /// This uses or updates the cache as required. @@ -126,11 +195,10 @@ impl AbbreviationsCache { debug_abbrev: &DebugAbbrev, offset: DebugAbbrevOffset, ) -> Result> { - if offset.0 != R::Offset::from_u8(0) { - return debug_abbrev.abbreviations(offset).map(Arc::new); + match self.abbreviations.get(&offset.0.into_u64()) { + Some(entry) => entry.get(|| debug_abbrev.abbreviations(offset)), + None => debug_abbrev.abbreviations(offset).map(Arc::new), } - self.abbreviations - .get(|| debug_abbrev.abbreviations(offset)) } } diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index afc154e7..348e6b8f 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -9,13 +9,13 @@ use crate::common::{ }; use crate::constants; use crate::read::{ - Abbreviations, AbbreviationsCache, AttributeValue, DebugAbbrev, DebugAddr, DebugAranges, - DebugCuIndex, DebugInfo, DebugInfoUnitHeadersIter, DebugLine, DebugLineStr, DebugLoc, - DebugLocLists, DebugRngLists, DebugStr, DebugStrOffsets, DebugTuIndex, DebugTypes, - DebugTypesUnitHeadersIter, DebuggingInformationEntry, EntriesCursor, EntriesRaw, EntriesTree, - Error, IncompleteLineProgram, LocListIter, LocationLists, Range, RangeLists, RawLocListIter, - RawRngListIter, Reader, ReaderOffset, ReaderOffsetId, Result, RngListIter, Section, UnitHeader, - UnitIndex, UnitIndexSectionIterator, UnitOffset, UnitType, + Abbreviations, AbbreviationsCache, AbbreviationsCacheStrategy, AttributeValue, DebugAbbrev, + DebugAddr, DebugAranges, DebugCuIndex, DebugInfo, DebugInfoUnitHeadersIter, DebugLine, + DebugLineStr, DebugLoc, DebugLocLists, DebugRngLists, DebugStr, DebugStrOffsets, DebugTuIndex, + DebugTypes, DebugTypesUnitHeadersIter, DebuggingInformationEntry, EntriesCursor, EntriesRaw, + EntriesTree, Error, IncompleteLineProgram, LocListIter, LocationLists, Range, RangeLists, + RawLocListIter, RawRngListIter, Reader, ReaderOffset, ReaderOffsetId, Result, RngListIter, + Section, UnitHeader, UnitIndex, UnitIndexSectionIterator, UnitOffset, UnitType, }; /// All of the commonly used DWARF sections, and other common information. @@ -172,6 +172,12 @@ impl Dwarf { } impl Dwarf { + /// Set the strategy to use for the abbreviations cache. + pub fn set_abbreviations_cache_strategy(&mut self, strategy: AbbreviationsCacheStrategy) { + self.abbreviations_cache + .set_strategy(strategy, self.debug_info.units()) + } + /// Iterate the unit headers in the `.debug_info` section. /// /// Can be [used with From 77dd7a279f7a7b5b99224b1a6eb6f9f06a402b0e Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Wed, 13 Sep 2023 12:57:04 +1000 Subject: [PATCH 2/2] Change Dwarf::set_abbreviations_cache_strategy to populate_abbreviations_cache --- crates/examples/src/bin/dwarfdump.rs | 2 +- src/read/abbrev.rs | 137 ++++++++------------------- src/read/cfi.rs | 2 +- src/read/dwarf.rs | 15 ++- src/read/lazy.rs | 116 ----------------------- src/read/mod.rs | 3 - 6 files changed, 54 insertions(+), 221 deletions(-) delete mode 100644 src/read/lazy.rs diff --git a/crates/examples/src/bin/dwarfdump.rs b/crates/examples/src/bin/dwarfdump.rs index eaf5039e..8e7cadfa 100644 --- a/crates/examples/src/bin/dwarfdump.rs +++ b/crates/examples/src/bin/dwarfdump.rs @@ -695,7 +695,7 @@ where dwarf.load_sup(&mut load_sup_section)?; } - dwarf.set_abbreviations_cache_strategy(gimli::AbbreviationsCacheStrategy::All); + dwarf.populate_abbreviations_cache(gimli::AbbreviationsCacheStrategy::All); if flags.eh_frame { let eh_frame = gimli::EhFrame::load(&mut load_section).unwrap(); diff --git a/src/read/abbrev.rs b/src/read/abbrev.rs index dd83c8d4..1162583b 100644 --- a/src/read/abbrev.rs +++ b/src/read/abbrev.rs @@ -11,7 +11,6 @@ use core::ops::Deref; use crate::common::{DebugAbbrevOffset, Encoding, SectionId}; use crate::constants; use crate::endianity::Endianity; -use crate::read::lazy::LazyArc; use crate::read::{ DebugInfoUnitHeadersIter, EndianSlice, Error, Reader, ReaderOffset, Result, Section, UnitHeader, }; @@ -108,32 +107,20 @@ impl From for DebugAbbrev { #[derive(Clone, Copy, Debug, PartialEq, Eq)] #[non_exhaustive] pub enum AbbreviationsCacheStrategy { - /// Don't cache anything. - None, - /// Cache the abbreviations at offset 0. - Zero, /// Cache abbreviations that are used more than once. + /// + /// This is useful if the units in the `.debug_info` section will be parsed only once. Duplicates, /// Cache all abbreviations. + /// + /// This is useful if the units in the `.debug_info` section will be parsed more than once. All, } /// A cache of previously parsed `Abbreviations`. -/// -/// This defaults to only caching the abbreviations for offset 0, -/// since this is a common case in which abbreviations are reused, -/// and doesn't require additional parsing. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct AbbreviationsCache { - abbreviations: btree_map::BTreeMap>, -} - -impl Default for AbbreviationsCache { - fn default() -> Self { - let mut abbreviations = btree_map::BTreeMap::new(); - abbreviations.insert(0, LazyArc::default()); - AbbreviationsCache { abbreviations } - } + abbreviations: btree_map::BTreeMap>>, } impl AbbreviationsCache { @@ -142,30 +129,31 @@ impl AbbreviationsCache { Self::default() } - /// Set the strategy for the abbreviations cache. + /// Parse abbreviations and store them in the cache. + /// + /// This will iterate over the given units to determine the abbreviations + /// offsets. Any existing cache entries are discarded. /// - /// This discards any existing cache entries. - pub fn set_strategy( + /// Errors during parsing abbreviations are also stored in the cache. + /// Errors during iterating over the units are ignored. + pub fn populate( &mut self, strategy: AbbreviationsCacheStrategy, + debug_abbrev: &DebugAbbrev, mut units: DebugInfoUnitHeadersIter, ) { let mut offsets = Vec::new(); match strategy { - AbbreviationsCacheStrategy::None => {} - AbbreviationsCacheStrategy::Zero => { - offsets.push(0); - } AbbreviationsCacheStrategy::Duplicates => { while let Ok(Some(unit)) = units.next() { - offsets.push(unit.debug_abbrev_offset().0.into_u64()); + offsets.push(unit.debug_abbrev_offset()); } - offsets.sort_unstable(); - let mut prev_offset = 0; + offsets.sort_unstable_by_key(|offset| offset.0); + let mut prev_offset = R::Offset::from_u8(0); let mut count = 0; offsets.retain(|offset| { - if count == 0 || prev_offset != *offset { - prev_offset = *offset; + if count == 0 || prev_offset != offset.0 { + prev_offset = offset.0; count = 1; } else { count += 1; @@ -175,28 +163,45 @@ impl AbbreviationsCache { } AbbreviationsCacheStrategy::All => { while let Ok(Some(unit)) = units.next() { - offsets.push(unit.debug_abbrev_offset().0.into_u64()); + offsets.push(unit.debug_abbrev_offset()); } - offsets.sort_unstable(); + offsets.sort_unstable_by_key(|offset| offset.0); offsets.dedup(); } } self.abbreviations = offsets .into_iter() - .map(|offset| (offset, LazyArc::default())) + .map(|offset| { + ( + offset.0.into_u64(), + debug_abbrev.abbreviations(offset).map(Arc::new), + ) + }) .collect(); } + /// Set an entry in the abbreviations cache. + /// + /// This is only required if you want to manually populate the cache. + pub fn set( + &mut self, + offset: DebugAbbrevOffset, + abbreviations: Arc, + ) { + self.abbreviations + .insert(offset.0.into_u64(), Ok(abbreviations)); + } + /// Parse the abbreviations at the given offset. /// - /// This uses or updates the cache as required. + /// This uses the cache if possible, but does not update it. pub fn get( &self, debug_abbrev: &DebugAbbrev, offset: DebugAbbrevOffset, ) -> Result> { match self.abbreviations.get(&offset.0.into_u64()) { - Some(entry) => entry.get(|| debug_abbrev.abbreviations(offset)), + Some(entry) => entry.clone(), None => debug_abbrev.abbreviations(offset).map(Arc::new), } } @@ -1094,64 +1099,4 @@ pub mod tests { .unwrap(); assert!(abbrevs.get(0).is_none()); } - - #[test] - fn abbreviations_cache() { - #[rustfmt::skip] - let buf = Section::new() - .abbrev(1, constants::DW_TAG_subprogram, constants::DW_CHILDREN_no) - .abbrev_attr(constants::DW_AT_name, constants::DW_FORM_string) - .abbrev_attr_null() - .abbrev_null() - .abbrev(1, constants::DW_TAG_compile_unit, constants::DW_CHILDREN_yes) - .abbrev_attr(constants::DW_AT_producer, constants::DW_FORM_strp) - .abbrev_attr(constants::DW_AT_language, constants::DW_FORM_data2) - .abbrev_attr_null() - .abbrev_null() - .get_contents() - .unwrap(); - - let abbrev1 = Abbreviation::new( - 1, - constants::DW_TAG_subprogram, - constants::DW_CHILDREN_no, - vec![AttributeSpecification::new( - constants::DW_AT_name, - constants::DW_FORM_string, - None, - )] - .into(), - ); - - let abbrev2 = Abbreviation::new( - 1, - constants::DW_TAG_compile_unit, - constants::DW_CHILDREN_yes, - vec![ - AttributeSpecification::new( - constants::DW_AT_producer, - constants::DW_FORM_strp, - None, - ), - AttributeSpecification::new( - constants::DW_AT_language, - constants::DW_FORM_data2, - None, - ), - ] - .into(), - ); - - let debug_abbrev = DebugAbbrev::new(&buf, LittleEndian); - let cache = AbbreviationsCache::new(); - let abbrevs1 = cache.get(&debug_abbrev, DebugAbbrevOffset(0)).unwrap(); - assert_eq!(abbrevs1.get(1), Some(&abbrev1)); - let abbrevs2 = cache.get(&debug_abbrev, DebugAbbrevOffset(8)).unwrap(); - assert_eq!(abbrevs2.get(1), Some(&abbrev2)); - let abbrevs3 = cache.get(&debug_abbrev, DebugAbbrevOffset(0)).unwrap(); - assert_eq!(abbrevs3.get(1), Some(&abbrev1)); - - assert!(!Arc::ptr_eq(&abbrevs1, &abbrevs2)); - assert!(Arc::ptr_eq(&abbrevs1, &abbrevs3)); - } } diff --git a/src/read/cfi.rs b/src/read/cfi.rs index 6ac019f1..25abc1cb 100644 --- a/src/read/cfi.rs +++ b/src/read/cfi.rs @@ -3142,7 +3142,7 @@ pub enum CallFrameInstruction { /// > /// > AArch64 Extension /// > - /// > The DW_CFA_AARCH64_negate_ra_state operation negates bit[0] of the + /// > The DW_CFA_AARCH64_negate_ra_state operation negates bit 0 of the /// > RA_SIGN_STATE pseudo-register. It does not take any operands. The /// > DW_CFA_AARCH64_negate_ra_state must not be mixed with other DWARF Register /// > Rule Instructions on the RA_SIGN_STATE pseudo-register in one Common diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index 348e6b8f..2f8dcb36 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -172,10 +172,16 @@ impl Dwarf { } impl Dwarf { - /// Set the strategy to use for the abbreviations cache. - pub fn set_abbreviations_cache_strategy(&mut self, strategy: AbbreviationsCacheStrategy) { + /// Parse abbreviations and store them in the cache. + /// + /// This will iterate over the units in `self.debug_info` to determine the + /// abbreviations offsets. + /// + /// Errors during parsing abbreviations are also stored in the cache. + /// Errors during iterating over the units are ignored. + pub fn populate_abbreviations_cache(&mut self, strategy: AbbreviationsCacheStrategy) { self.abbreviations_cache - .set_strategy(strategy, self.debug_info.units()) + .populate(strategy, &self.debug_abbrev, self.debug_info.units()); } /// Iterate the unit headers in the `.debug_info` section. @@ -868,7 +874,8 @@ impl Unit { /// Construct a new `Unit` from the given unit header and abbreviations. /// /// The abbreviations for this call can be obtained using `dwarf.abbreviations(&header)`. - /// The caller may implement caching to reuse the Abbreviations across units with the same header.debug_abbrev_offset() value. + /// The caller may implement caching to reuse the `Abbreviations` across units with the + /// same `header.debug_abbrev_offset()` value. #[inline] pub fn new_with_abbreviations( dwarf: &Dwarf, diff --git a/src/read/lazy.rs b/src/read/lazy.rs deleted file mode 100644 index 6138735c..00000000 --- a/src/read/lazy.rs +++ /dev/null @@ -1,116 +0,0 @@ -pub(crate) use imp::*; - -#[cfg(not(feature = "std"))] -mod imp { - use alloc::sync::Arc; - use core::sync::atomic::{AtomicPtr, Ordering}; - use core::{mem, ptr}; - - #[derive(Debug, Default)] - pub(crate) struct LazyArc { - // Only written once with a value obtained from `Arc::into_raw`. - // This holds a ref count for the `Arc`, so it is always safe to - // clone the `Arc` given a reference to the `LazyArc`. - value: AtomicPtr, - } - - impl Drop for LazyArc { - fn drop(&mut self) { - let value_ptr = self.value.load(Ordering::Acquire); - if !value_ptr.is_null() { - // SAFETY: all writes to `self.value` are pointers obtained from `Arc::into_raw`. - drop(unsafe { Arc::from_raw(value_ptr) }); - } - } - } - - impl LazyArc { - pub(crate) fn get Result>(&self, f: F) -> Result, E> { - // Clone an `Arc` given a pointer obtained from `Arc::into_raw`. - // SAFETY: `value_ptr` must be a valid pointer obtained from `Arc::into_raw`. - unsafe fn clone_arc_ptr(value_ptr: *const T) -> Arc { - let value = Arc::from_raw(value_ptr); - let clone = Arc::clone(&value); - mem::forget(value); - clone - } - - // Return the existing value if already computed. - // `Ordering::Acquire` is needed so that the content of the loaded `Arc` is - // visible to this thread. - let value_ptr = self.value.load(Ordering::Acquire); - if !value_ptr.is_null() { - // SAFETY: all writes to `self.value` are pointers obtained from `Arc::into_raw`. - return Ok(unsafe { clone_arc_ptr(value_ptr) }); - } - - // Race to compute and set the value. - let value = f().map(Arc::new)?; - let value_ptr = Arc::into_raw(value); - match self.value.compare_exchange( - ptr::null_mut(), - value_ptr as *mut T, - // Success: `Ordering::Release` is needed so that the content of the stored `Arc` - // is visible to other threads. No ordering is required for the null ptr that is - // loaded, but older rust versions (< 1.64) require that its ordering must not - // be weaker than the failure ordering, so we use `Ordering::AcqRel`. - Ordering::AcqRel, - // Failure: `Ordering::Acquire` is needed so that the content of the loaded `Arc` - // is visible to this thread. - Ordering::Acquire, - ) { - Ok(_) => { - // Return the value we computed. - // SAFETY: `value_ptr` was obtained from `Arc::into_raw`. - Ok(unsafe { clone_arc_ptr(value_ptr) }) - } - Err(existing_value_ptr) => { - // We lost the race, drop unneeded `value_ptr`. - // SAFETY: `value_ptr` was obtained from `Arc::into_raw`. - drop(unsafe { Arc::from_raw(value_ptr) }); - // Return the existing value. - // SAFETY: all writes to `self.value` are pointers obtained from `Arc::into_raw`. - Ok(unsafe { clone_arc_ptr(existing_value_ptr) }) - } - } - } - } -} - -#[cfg(feature = "std")] -mod imp { - use std::sync::{Arc, Mutex}; - - #[derive(Debug, Default)] - pub(crate) struct LazyArc { - value: Mutex>>, - } - - impl LazyArc { - pub(crate) fn get Result>(&self, f: F) -> Result, E> { - let mut lock = self.value.lock().unwrap(); - if let Some(value) = &*lock { - return Ok(value.clone()); - } - let value = f().map(Arc::new)?; - *lock = Some(value.clone()); - Ok(value) - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn lazy_arc() { - let lazy = LazyArc::default(); - let value = lazy.get(|| Err(())); - assert_eq!(value, Err(())); - let value = lazy.get(|| Ok::(3)).unwrap(); - assert_eq!(*value, 3); - let value = lazy.get(|| Err(())).unwrap(); - assert_eq!(*value, 3); - } -} diff --git a/src/read/mod.rs b/src/read/mod.rs index 22a8e9fb..2ad7f6ae 100644 --- a/src/read/mod.rs +++ b/src/read/mod.rs @@ -213,9 +213,6 @@ pub use self::aranges::*; mod index; pub use self::index::*; -#[cfg(feature = "read")] -mod lazy; - #[cfg(feature = "read")] mod line; #[cfg(feature = "read")]