From fe2670a47dd8ed39a52caae38aab2161e0ae5679 Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Tue, 17 Dec 2019 14:44:07 +0100 Subject: [PATCH] Initial support for recording query keys in self-profiling data. --- Cargo.lock | 4 +- src/librustc/Cargo.toml | 2 +- src/librustc/dep_graph/graph.rs | 2 +- src/librustc/ty/query/mod.rs | 3 + src/librustc/ty/query/plumbing.rs | 65 +++-- src/librustc/ty/query/profiling_support.rs | 276 +++++++++++++++++++++ src/librustc_codegen_ssa/base.rs | 6 +- src/librustc_data_structures/Cargo.toml | 2 +- src/librustc_data_structures/profiling.rs | 42 +++- 9 files changed, 350 insertions(+), 52 deletions(-) create mode 100644 src/librustc/ty/query/profiling_support.rs diff --git a/Cargo.lock b/Cargo.lock index 51ee05a98e26a..ffcc763e7a0a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2021,9 +2021,9 @@ dependencies = [ [[package]] name = "measureme" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36dcc09c1a633097649f7d48bde3d8a61d2a43c01ce75525e31fbbc82c0fccf4" +checksum = "ebefdcb02b2ddeee50178a218aeaf6d752d0777cd07914542f202cb7440e6e38" dependencies = [ "byteorder", "memmap", diff --git a/src/librustc/Cargo.toml b/src/librustc/Cargo.toml index e6afe723f0fc5..86f6ef135db67 100644 --- a/src/librustc/Cargo.toml +++ b/src/librustc/Cargo.toml @@ -37,6 +37,6 @@ byteorder = { version = "1.3" } chalk-engine = { version = "0.9.0", default-features=false } rustc_fs_util = { path = "../librustc_fs_util" } smallvec = { version = "1.0", features = ["union", "may_dangle"] } -measureme = "0.6.0" +measureme = "0.7.0" rustc_error_codes = { path = "../librustc_error_codes" } rustc_session = { path = "../librustc_session" } diff --git a/src/librustc/dep_graph/graph.rs b/src/librustc/dep_graph/graph.rs index 898222e269621..a263dd4a6f982 100644 --- a/src/librustc/dep_graph/graph.rs +++ b/src/librustc/dep_graph/graph.rs @@ -29,7 +29,7 @@ pub struct DepGraph { /// This field is used for assigning DepNodeIndices when running in /// non-incremental mode. Even in non-incremental mode we make sure that - /// each task as a `DepNodeIndex` that uniquely identifies it. This unique + /// each task has a `DepNodeIndex` that uniquely identifies it. This unique /// ID is used for self-profiling. virtual_dep_node_index: Lrc, } diff --git a/src/librustc/ty/query/mod.rs b/src/librustc/ty/query/mod.rs index 5b4a6ac8a2d3c..b735dde1de676 100644 --- a/src/librustc/ty/query/mod.rs +++ b/src/librustc/ty/query/mod.rs @@ -82,6 +82,9 @@ use self::config::QueryAccessors; mod on_disk_cache; pub use self::on_disk_cache::OnDiskCache; +mod profiling_support; +pub use self::profiling_support::{IntoSelfProfilingString, QueryKeyStringBuilder}; + // Each of these queries corresponds to a function pointer field in the // `Providers` struct for requesting a value of that type, and a method // on `tcx: TyCtxt` (and `tcx.at(span)`) for doing that request in a way diff --git a/src/librustc/ty/query/plumbing.rs b/src/librustc/ty/query/plumbing.rs index 5e385c009a01a..6ed5b1e18252b 100644 --- a/src/librustc/ty/query/plumbing.rs +++ b/src/librustc/ty/query/plumbing.rs @@ -718,42 +718,6 @@ macro_rules! define_queries_inner { } } - /// All self-profiling events generated by the query engine use a - /// virtual `StringId`s for their `event_id`. This method makes all - /// those virtual `StringId`s point to actual strings. - /// - /// If we are recording only summary data, the ids will point to - /// just the query names. If we are recording query keys too, we - /// allocate the corresponding strings here. (The latter is not yet - /// implemented.) - pub fn allocate_self_profile_query_strings( - &self, - profiler: &rustc_data_structures::profiling::SelfProfiler - ) { - // Walk the entire query cache and allocate the appropriate - // string representation. Each cache entry is uniquely - // identified by its dep_node_index. - $({ - let query_name_string_id = - profiler.get_or_alloc_cached_string(stringify!($name)); - - let result_cache = self.$name.lock_shards(); - - for shard in result_cache.iter() { - let query_invocation_ids = shard - .results - .values() - .map(|v| v.index) - .map(|dep_node_index| dep_node_index.into()); - - profiler.bulk_map_query_invocation_id_to_single_string( - query_invocation_ids, - query_name_string_id - ); - } - })* - } - #[cfg(parallel_compiler)] pub fn collect_active_jobs(&self) -> Vec>> { let mut jobs = Vec::new(); @@ -1057,6 +1021,35 @@ macro_rules! define_queries_inner { pub fn $name(self, key: $K) -> $V { self.at(DUMMY_SP).$name(key) })* + + /// All self-profiling events generated by the query engine use + /// virtual `StringId`s for their `event_id`. This method makes all + /// those virtual `StringId`s point to actual strings. + /// + /// If we are recording only summary data, the ids will point to + /// just the query names. If we are recording query keys too, we + /// allocate the corresponding strings here. + pub fn alloc_self_profile_query_strings(self) { + use crate::ty::query::profiling_support::{ + alloc_self_profile_query_strings_for_query_cache, + QueryKeyStringCache, + }; + + if !self.prof.enabled() { + return; + } + + let mut string_cache = QueryKeyStringCache::new(); + + $({ + alloc_self_profile_query_strings_for_query_cache( + self, + stringify!($name), + &self.queries.$name, + &mut string_cache, + ); + })* + } } impl TyCtxtAt<$tcx> { diff --git a/src/librustc/ty/query/profiling_support.rs b/src/librustc/ty/query/profiling_support.rs new file mode 100644 index 0000000000000..ef30bd63be01c --- /dev/null +++ b/src/librustc/ty/query/profiling_support.rs @@ -0,0 +1,276 @@ + +use crate::hir::def_id::{CRATE_DEF_INDEX, CrateNum, DefId, DefIndex, LOCAL_CRATE}; +use crate::hir::map::definitions::DefPathData; +use crate::ty::context::TyCtxt; +use crate::ty::query::config::QueryConfig; +use crate::ty::query::plumbing::QueryCache; +use measureme::{StringId, StringComponent}; +use rustc_data_structures::fx::FxHashMap; +use rustc_data_structures::profiling::SelfProfiler; +use rustc_data_structures::sharded::Sharded; +use std::fmt::Debug; +use std::io::Write; + +pub struct QueryKeyStringCache { + def_id_cache: FxHashMap, +} + +impl QueryKeyStringCache { + pub fn new() -> QueryKeyStringCache { + QueryKeyStringCache { + def_id_cache: Default::default(), + } + } +} + +pub struct QueryKeyStringBuilder<'p, 'c, 'tcx> { + profiler: &'p SelfProfiler, + tcx: TyCtxt<'tcx>, + string_cache: &'c mut QueryKeyStringCache, +} + +impl<'p, 'c, 'tcx> QueryKeyStringBuilder<'p, 'c, 'tcx> { + + pub fn new( + profiler: &'p SelfProfiler, + tcx: TyCtxt<'tcx>, + string_cache: &'c mut QueryKeyStringCache, + ) -> QueryKeyStringBuilder<'p, 'c, 'tcx> { + QueryKeyStringBuilder { + profiler, + tcx, + string_cache, + } + } + + // The current implementation is rather crude. In the future it might be a + // good idea to base this on `ty::print` in order to get nicer and more + // efficient query keys. + fn def_id_to_string_id(&mut self, def_id: DefId) -> StringId { + + if let Some(&string_id) = self.string_cache.def_id_cache.get(&def_id) { + return string_id; + } + + let def_key = self.tcx.def_key(def_id); + + let (parent_string_id, start_index) = match def_key.parent { + Some(parent_index) => { + let parent_def_id = DefId { + index: parent_index, + krate: def_id.krate, + }; + + (self.def_id_to_string_id(parent_def_id), 0) + } + None => { + (StringId::INVALID, 2) + } + }; + + let dis_buffer = &mut [0u8; 16]; + let name; + let dis; + let end_index; + + match def_key.disambiguated_data.data { + DefPathData::CrateRoot => { + name = self.tcx.original_crate_name(def_id.krate).as_str(); + dis = ""; + end_index = 3; + } + other => { + name = other.as_symbol().as_str(); + if def_key.disambiguated_data.disambiguator == 0 { + dis = ""; + end_index = 3; + } else { + write!(&mut dis_buffer[..], + "[{}]", + def_key.disambiguated_data.disambiguator + ).unwrap(); + let end_of_dis = dis_buffer.iter().position(|&c| c == b']').unwrap(); + dis = std::str::from_utf8(&dis_buffer[.. end_of_dis + 1]).unwrap(); + end_index = 4; + } + } + } + + let components = [ + StringComponent::Ref(parent_string_id), + StringComponent::Value("::"), + StringComponent::Value(&name[..]), + StringComponent::Value(dis) + ]; + + let string_id = self.profiler.alloc_string( + &components[start_index .. end_index] + ); + + self.string_cache.def_id_cache.insert(def_id, string_id); + + string_id + } +} + +pub trait IntoSelfProfilingString { + fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId; +} + +// The default implementation of `IntoSelfProfilingString` just uses `Debug` +// which is slow and causes lots of duplication of string data. +// The specialized impls below take care of making the `DefId` case more +// efficient. +impl IntoSelfProfilingString for T { + + default fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId { + let s = format!("{:?}", self); + builder.profiler.alloc_string(&s[..]) + } +} + +impl IntoSelfProfilingString for DefId { + + fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId { + builder.def_id_to_string_id(*self) + } +} + +impl IntoSelfProfilingString for CrateNum { + + fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId { + builder.def_id_to_string_id(DefId { + krate: *self, + index: CRATE_DEF_INDEX, + }) + } +} + +impl IntoSelfProfilingString for DefIndex { + + fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId { + builder.def_id_to_string_id(DefId { + krate: LOCAL_CRATE, + index: *self, + }) + } +} + +impl IntoSelfProfilingString for (T0, T1) + where T0: IntoSelfProfilingString+Debug, + T1: IntoSelfProfilingString+Debug, +{ + default fn to_self_profile_string( + &self, + builder: &mut QueryKeyStringBuilder<'_, '_, '_> + ) -> StringId { + + let val0 = self.0.to_self_profile_string(builder); + let val1 = self.1.to_self_profile_string(builder); + + let components = &[ + StringComponent::Value("("), + StringComponent::Ref(val0), + StringComponent::Value(","), + StringComponent::Ref(val1), + StringComponent::Value(")"), + ]; + + builder.profiler.alloc_string(components) + } +} + +/// Allocate the self-profiling query strings for a single query cache. This +/// method is called from `alloc_self_profile_query_strings` which knows all +/// the queries via macro magic. +pub(super) fn alloc_self_profile_query_strings_for_query_cache<'tcx, Q>( + tcx: TyCtxt<'tcx>, + query_name: &'static str, + query_cache: &Sharded>, + string_cache: &mut QueryKeyStringCache, +) where Q: QueryConfig<'tcx> { + tcx.prof.with_profiler(|profiler| { + let event_id_builder = profiler.event_id_builder(); + + // Walk the entire query cache and allocate the appropriate + // string representations. Each cache entry is uniquely + // identified by its dep_node_index. + if profiler.query_key_recording_enabled() { + let mut query_string_builder = + QueryKeyStringBuilder::new(profiler, tcx, string_cache); + + let query_name = profiler.get_or_alloc_cached_string(query_name); + + // Since building the string representation of query keys might + // need to invoke queries itself, we cannot keep the query caches + // locked while doing so. Instead we copy out the + // `(query_key, dep_node_index)` pairs and release the lock again. + let query_keys_and_indices = { + let shards = query_cache.lock_shards(); + let len = shards.iter().map(|shard| shard.results.len()).sum(); + + let mut query_keys_and_indices = Vec::with_capacity(len); + + for shard in &shards { + query_keys_and_indices.extend(shard.results.iter().map(|(q_key, q_val)| { + (q_key.clone(), q_val.index) + })); + } + + query_keys_and_indices + }; + + // Now actually allocate the strings. If allocating the strings + // generates new entries in the query cache, we'll miss them but + // we don't actually care. + for (query_key, dep_node_index) in query_keys_and_indices { + // Translate the DepNodeIndex into a QueryInvocationId + let query_invocation_id = dep_node_index.into(); + + // Create the string version of the query-key + let query_key = query_key.to_self_profile_string(&mut query_string_builder); + let event_id = event_id_builder.from_label_and_arg(query_name, query_key); + + // Doing this in bulk might be a good idea: + profiler.map_query_invocation_id_to_string( + query_invocation_id, + event_id.to_string_id(), + ); + } + } else { + // In this branch we don't allocate query keys + let query_name = profiler.get_or_alloc_cached_string(query_name); + let event_id = event_id_builder.from_label(query_name).to_string_id(); + + let shards = query_cache.lock_shards(); + + for shard in shards.iter() { + let query_invocation_ids = shard + .results + .values() + .map(|v| v.index) + .map(|dep_node_index| dep_node_index.into()); + + profiler.bulk_map_query_invocation_id_to_single_string( + query_invocation_ids, + event_id, + ); + } + } + }); +} diff --git a/src/librustc_codegen_ssa/base.rs b/src/librustc_codegen_ssa/base.rs index 51168d59fdb60..1a7198a1b50cc 100644 --- a/src/librustc_codegen_ssa/base.rs +++ b/src/librustc_codegen_ssa/base.rs @@ -710,11 +710,11 @@ fn finalize_tcx(tcx: TyCtxt<'_>) { // We assume that no queries are run past here. If there are new queries // after this point, they'll show up as "" in self-profiling data. - tcx.prof.with_profiler(|profiler| { + { let _prof_timer = tcx.prof.generic_activity("self_profile_alloc_query_strings"); - tcx.queries.allocate_self_profile_query_strings(profiler); - }); + tcx.alloc_self_profile_query_strings(); + } } impl CrateInfo { diff --git a/src/librustc_data_structures/Cargo.toml b/src/librustc_data_structures/Cargo.toml index 9c42f3633f2c0..d6ca11364ea96 100644 --- a/src/librustc_data_structures/Cargo.toml +++ b/src/librustc_data_structures/Cargo.toml @@ -26,7 +26,7 @@ rustc-hash = "1.0.1" smallvec = { version = "1.0", features = ["union", "may_dangle"] } rustc_index = { path = "../librustc_index", package = "rustc_index" } bitflags = "1.2.1" -measureme = "0.6.0" +measureme = "0.7.0" [dependencies.parking_lot] version = "0.9" diff --git a/src/librustc_data_structures/profiling.rs b/src/librustc_data_structures/profiling.rs index 38aaf25e75fd7..cde7507d3853d 100644 --- a/src/librustc_data_structures/profiling.rs +++ b/src/librustc_data_structures/profiling.rs @@ -39,7 +39,7 @@ //! ## `event_id` Assignment //! //! As far as `measureme` is concerned, `event_id`s are just strings. However, -//! it would incur way too much overhead to generate and persist each `event_id` +//! it would incur too much overhead to generate and persist each `event_id` //! string at the point where the event is recorded. In order to make this more //! efficient `measureme` has two features: //! @@ -56,7 +56,7 @@ //! ID be mapped to an actual string. This is used to cheaply generate //! `event_id`s while the events actually occur, causing little timing //! distortion, and then later map those `StringId`s, in bulk, to actual -//! `event_id` strings. This way the largest part of tracing overhead is +//! `event_id` strings. This way the largest part of the tracing overhead is //! localized to one contiguous chunk of time. //! //! How are these `event_id`s generated in the compiler? For things that occur @@ -91,7 +91,7 @@ use std::sync::Arc; use std::thread::ThreadId; use std::u32; -use measureme::StringId; +use measureme::{EventId, EventIdBuilder, SerializableString, StringId}; use parking_lot::RwLock; /// MmapSerializatioSink is faster on macOS and Linux @@ -122,6 +122,8 @@ bitflags::bitflags! { const QUERY_BLOCKED = 1 << 3; const INCR_CACHE_LOADS = 1 << 4; + const QUERY_KEYS = 1 << 5; + const DEFAULT = Self::GENERIC_ACTIVITIES.bits | Self::QUERY_PROVIDERS.bits | Self::QUERY_BLOCKED.bits | @@ -141,6 +143,7 @@ const EVENT_FILTERS_BY_NAME: &[(&str, EventFilter)] = &[ ("query-cache-hit", EventFilter::QUERY_CACHE_HITS), ("query-blocked" , EventFilter::QUERY_BLOCKED), ("incr-cache-load", EventFilter::INCR_CACHE_LOADS), + ("query-keys", EventFilter::QUERY_KEYS), ]; fn thread_id_to_u32(tid: ThreadId) -> u32 { @@ -210,6 +213,7 @@ impl SelfProfilerRef { pub fn generic_activity(&self, event_id: &'static str) -> TimingGuard<'_> { self.exec(EventFilter::GENERIC_ACTIVITIES, |profiler| { let event_id = profiler.get_or_alloc_cached_string(event_id); + let event_id = EventId::from_label(event_id); TimingGuard::start( profiler, profiler.generic_activity_event_kind, @@ -223,7 +227,7 @@ impl SelfProfilerRef { #[inline(always)] pub fn query_provider(&self) -> TimingGuard<'_> { self.exec(EventFilter::QUERY_PROVIDERS, |profiler| { - TimingGuard::start(profiler, profiler.query_event_kind, StringId::INVALID) + TimingGuard::start(profiler, profiler.query_event_kind, EventId::INVALID) }) } @@ -246,7 +250,7 @@ impl SelfProfilerRef { TimingGuard::start( profiler, profiler.query_blocked_event_kind, - StringId::INVALID, + EventId::INVALID, ) }) } @@ -260,7 +264,7 @@ impl SelfProfilerRef { TimingGuard::start( profiler, profiler.incremental_load_result_event_kind, - StringId::INVALID, + EventId::INVALID, ) }) } @@ -278,7 +282,7 @@ impl SelfProfilerRef { profiler.profiler.record_instant_event( event_kind(profiler), - event_id, + EventId::from_virtual(event_id), thread_id, ); @@ -291,6 +295,10 @@ impl SelfProfilerRef { f(&profiler) } } + + pub fn enabled(&self) -> bool { + self.profiler.is_some() + } } pub struct SelfProfiler { @@ -366,6 +374,15 @@ impl SelfProfiler { }) } + /// Allocates a new string in the profiling data. Does not do any caching + /// or deduplication. + pub fn alloc_string(&self, s: &STR) -> StringId { + self.profiler.alloc_string(s) + } + + /// Gets a `StringId` for the given string. This method makes sure that + /// any strings going through it will only be allocated once in the + /// profiling data. pub fn get_or_alloc_cached_string(&self, s: &'static str) -> StringId { // Only acquire a read-lock first since we assume that the string is // already present in the common case. @@ -402,6 +419,14 @@ impl SelfProfiler { let from = from.map(|qid| StringId::new_virtual(qid.0)); self.profiler.bulk_map_virtual_to_single_concrete_string(from, to); } + + pub fn query_key_recording_enabled(&self) -> bool { + self.event_filter_mask.contains(EventFilter::QUERY_KEYS) + } + + pub fn event_id_builder(&self) -> EventIdBuilder<'_, SerializationSink> { + EventIdBuilder::new(&self.profiler) + } } #[must_use] @@ -412,7 +437,7 @@ impl<'a> TimingGuard<'a> { pub fn start( profiler: &'a SelfProfiler, event_kind: StringId, - event_id: StringId, + event_id: EventId, ) -> TimingGuard<'a> { let thread_id = thread_id_to_u32(std::thread::current().id()); let raw_profiler = &profiler.profiler; @@ -426,6 +451,7 @@ impl<'a> TimingGuard<'a> { pub fn finish_with_query_invocation_id(self, query_invocation_id: QueryInvocationId) { if let Some(guard) = self.0 { let event_id = StringId::new_virtual(query_invocation_id.0); + let event_id = EventId::from_virtual(event_id); guard.finish_with_override_event_id(event_id); } }