From 371883a05acf04be9fb8d3c0766990ba56cd22e3 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Fri, 6 Oct 2023 12:51:48 +1100 Subject: [PATCH 1/6] coverage: Split `FunctionCoverage` into distinct collector/finished phases This gives us a clearly-defined place to run code after the instance's MIR has been traversed by codegen, but before we emit its `__llvm_covfun` record. --- .../src/coverageinfo/map_data.rs | 48 ++++++++++++------- .../src/coverageinfo/mapgen.rs | 5 +- .../src/coverageinfo/mod.rs | 11 +++-- 3 files changed, 40 insertions(+), 24 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index 84319b4ba2d38..302e00b06ed45 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -10,7 +10,7 @@ use rustc_middle::ty::Instance; /// Holds all of the coverage mapping data associated with a function instance, /// collected during traversal of `Coverage` statements in the function's MIR. #[derive(Debug)] -pub struct FunctionCoverage<'tcx> { +pub struct FunctionCoverageCollector<'tcx> { /// Coverage info that was attached to this function by the instrumentor. function_coverage_info: &'tcx FunctionCoverageInfo, is_used: bool, @@ -26,7 +26,7 @@ pub struct FunctionCoverage<'tcx> { expressions_seen: BitSet, } -impl<'tcx> FunctionCoverage<'tcx> { +impl<'tcx> FunctionCoverageCollector<'tcx> { /// Creates a new set of coverage data for a used (called) function. pub fn new( instance: Instance<'tcx>, @@ -76,11 +76,6 @@ impl<'tcx> FunctionCoverage<'tcx> { } } - /// Returns true for a used (called) function, and false for an unused function. - pub fn is_used(&self) -> bool { - self.is_used - } - /// Marks a counter ID as having been seen in a counter-increment statement. #[instrument(level = "debug", skip(self))] pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) { @@ -165,6 +160,28 @@ impl<'tcx> FunctionCoverage<'tcx> { ZeroExpressions(zero_expressions) } + pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> { + let zero_expressions = self.identify_zero_expressions(); + let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self; + + FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions } + } +} + +pub(crate) struct FunctionCoverage<'tcx> { + function_coverage_info: &'tcx FunctionCoverageInfo, + is_used: bool, + + counters_seen: BitSet, + zero_expressions: ZeroExpressions, +} + +impl<'tcx> FunctionCoverage<'tcx> { + /// Returns true for a used (called) function, and false for an unused function. + pub(crate) fn is_used(&self) -> bool { + self.is_used + } + /// Return the source hash, generated from the HIR node structure, and used to indicate whether /// or not the source code structure changed between different compilations. pub fn source_hash(&self) -> u64 { @@ -177,29 +194,27 @@ impl<'tcx> FunctionCoverage<'tcx> { pub fn get_expressions_and_counter_regions( &self, ) -> (Vec, impl Iterator) { - let zero_expressions = self.identify_zero_expressions(); - - let counter_expressions = self.counter_expressions(&zero_expressions); + let counter_expressions = self.counter_expressions(); // Expression IDs are indices into `self.expressions`, and on the LLVM // side they will be treated as indices into `counter_expressions`, so // the two vectors should correspond 1:1. assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len()); - let counter_regions = self.counter_regions(zero_expressions); + let counter_regions = self.counter_regions(); (counter_expressions, counter_regions) } /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. - fn counter_expressions(&self, zero_expressions: &ZeroExpressions) -> Vec { + fn counter_expressions(&self) -> Vec { // We know that LLVM will optimize out any unused expressions before // producing the final coverage map, so there's no need to do the same // thing on the Rust side unless we're confident we can do much better. // (See `CounterExpressionsMinimizer` in `CoverageMappingWriter.cpp`.) let counter_from_operand = |operand: CovTerm| match operand { - CovTerm::Expression(id) if zero_expressions.contains(id) => Counter::ZERO, + CovTerm::Expression(id) if self.zero_expressions.contains(id) => Counter::ZERO, _ => Counter::from_term(operand), }; @@ -219,10 +234,7 @@ impl<'tcx> FunctionCoverage<'tcx> { /// Converts this function's coverage mappings into an intermediate form /// that will be used by `mapgen` when preparing for FFI. - fn counter_regions( - &self, - zero_expressions: ZeroExpressions, - ) -> impl Iterator { + fn counter_regions(&self) -> impl Iterator { // Historically, mappings were stored directly in counter/expression // statements in MIR, and MIR optimizations would sometimes remove them. // That's mostly no longer true, so now we detect cases where that would @@ -230,7 +242,7 @@ impl<'tcx> FunctionCoverage<'tcx> { let counter_for_term = move |term: CovTerm| { let force_to_zero = match term { CovTerm::Counter(id) => !self.counters_seen.contains(id), - CovTerm::Expression(id) => zero_expressions.contains(id), + CovTerm::Expression(id) => self.zero_expressions.contains(id), CovTerm::Zero => false, }; if force_to_zero { Counter::ZERO } else { Counter::from_term(term) } diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 2f825b801acd2..5b2dae2c0a445 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -1,7 +1,7 @@ use crate::common::CodegenCx; use crate::coverageinfo; use crate::coverageinfo::ffi::CounterMappingRegion; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; use crate::llvm; use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods}; @@ -62,6 +62,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { // Encode coverage mappings and generate function records let mut function_data = Vec::new(); for (instance, function_coverage) in function_coverage_map { + let function_coverage = function_coverage.into_finished(); debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); let mangled_function_name = tcx.symbol_name(instance).name; @@ -419,7 +420,7 @@ fn add_unused_function_coverage<'tcx>( ) { // An unused function's mappings will automatically be rewritten to map to // zero, because none of its counters/expressions are marked as seen. - let function_coverage = FunctionCoverage::unused(instance, function_coverage_info); + let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info); if let Some(coverage_context) = cx.coverage_context() { coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage); diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs index cf7c7e6be60fb..7d69756181a94 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs @@ -3,7 +3,7 @@ use crate::llvm; use crate::builder::Builder; use crate::common::CodegenCx; use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion}; -use crate::coverageinfo::map_data::FunctionCoverage; +use crate::coverageinfo::map_data::FunctionCoverageCollector; use libc::c_uint; use rustc_codegen_ssa::traits::{ @@ -29,7 +29,8 @@ const VAR_ALIGN_BYTES: usize = 8; /// A context object for maintaining all state needed by the coverageinfo module. pub struct CrateCoverageContext<'ll, 'tcx> { /// Coverage data for each instrumented function identified by DefId. - pub(crate) function_coverage_map: RefCell, FunctionCoverage<'tcx>>>, + pub(crate) function_coverage_map: + RefCell, FunctionCoverageCollector<'tcx>>>, pub(crate) pgo_func_name_var_map: RefCell, &'ll llvm::Value>>, } @@ -41,7 +42,9 @@ impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> { } } - pub fn take_function_coverage_map(&self) -> FxHashMap, FunctionCoverage<'tcx>> { + pub fn take_function_coverage_map( + &self, + ) -> FxHashMap, FunctionCoverageCollector<'tcx>> { self.function_coverage_map.replace(FxHashMap::default()) } } @@ -93,7 +96,7 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { let mut coverage_map = coverage_context.function_coverage_map.borrow_mut(); let func_coverage = coverage_map .entry(instance) - .or_insert_with(|| FunctionCoverage::new(instance, function_coverage_info)); + .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info)); let Coverage { kind } = coverage; match *kind { From 86b55cccffa6f8c49747335a43abb04376d1e06f Mon Sep 17 00:00:00 2001 From: Zalathar Date: Fri, 6 Oct 2023 22:46:04 +1100 Subject: [PATCH 2/6] coverage: Fetch expressions and mappings separately The combined `get_expressions_and_counter_regions` method was an artifact of having to prepare the expressions and mappings at the same time, to avoid ownership/lifetime problems with temporary data used by both. Now that we have an explicit transition from `FunctionCoverageCollector` to the final `FunctionCoverage`, we can prepare any shared data during that step and store it in the final struct. --- .../src/coverageinfo/map_data.rs | 36 ++++++------------- .../src/coverageinfo/mapgen.rs | 6 ++-- 2 files changed, 14 insertions(+), 28 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index 302e00b06ed45..2914f310f03bd 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -1,5 +1,6 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind}; +use rustc_data_structures::captures::Captures; use rustc_data_structures::fx::FxIndexSet; use rustc_index::bit_set::BitSet; use rustc_middle::mir::coverage::{ @@ -188,26 +189,11 @@ impl<'tcx> FunctionCoverage<'tcx> { if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } } - /// Generate an array of CounterExpressions, and an iterator over all `Counter`s and their - /// associated `Regions` (from which the LLVM-specific `CoverageMapGenerator` will create - /// `CounterMappingRegion`s. - pub fn get_expressions_and_counter_regions( - &self, - ) -> (Vec, impl Iterator) { - let counter_expressions = self.counter_expressions(); - // Expression IDs are indices into `self.expressions`, and on the LLVM - // side they will be treated as indices into `counter_expressions`, so - // the two vectors should correspond 1:1. - assert_eq!(self.function_coverage_info.expressions.len(), counter_expressions.len()); - - let counter_regions = self.counter_regions(); - - (counter_expressions, counter_regions) - } - /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. - fn counter_expressions(&self) -> Vec { + pub(crate) fn counter_expressions( + &self, + ) -> impl Iterator + ExactSizeIterator + Captures<'_> { // We know that LLVM will optimize out any unused expressions before // producing the final coverage map, so there's no need to do the same // thing on the Rust side unless we're confident we can do much better. @@ -218,23 +204,23 @@ impl<'tcx> FunctionCoverage<'tcx> { _ => Counter::from_term(operand), }; - self.function_coverage_info - .expressions - .iter() - .map(|&Expression { lhs, op, rhs }| CounterExpression { + self.function_coverage_info.expressions.iter().map(move |&Expression { lhs, op, rhs }| { + CounterExpression { lhs: counter_from_operand(lhs), kind: match op { Op::Add => ExprKind::Add, Op::Subtract => ExprKind::Subtract, }, rhs: counter_from_operand(rhs), - }) - .collect::>() + } + }) } /// Converts this function's coverage mappings into an intermediate form /// that will be used by `mapgen` when preparing for FFI. - fn counter_regions(&self) -> impl Iterator { + pub(crate) fn counter_regions( + &self, + ) -> impl Iterator + ExactSizeIterator { // Historically, mappings were stored directly in counter/expression // statements in MIR, and MIR optimizations would sometimes remove them. // That's mostly no longer true, so now we detect cases where that would diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 5b2dae2c0a445..965be3f05eea7 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -185,13 +185,13 @@ fn encode_mappings_for_function( global_file_table: &mut GlobalFileTable, function_coverage: &FunctionCoverage<'_>, ) -> Vec { - let (expressions, counter_regions) = function_coverage.get_expressions_and_counter_regions(); - - let mut counter_regions = counter_regions.collect::>(); + let mut counter_regions = function_coverage.counter_regions().collect::>(); if counter_regions.is_empty() { return Vec::new(); } + let expressions = function_coverage.counter_expressions().collect::>(); + let mut virtual_file_mapping = IndexVec::::new(); let mut mapping_regions = Vec::with_capacity(counter_regions.len()); From e985ae5a459e7bc4ac68a926f2560621aea30a6c Mon Sep 17 00:00:00 2001 From: Zalathar Date: Tue, 3 Oct 2023 21:40:50 +1100 Subject: [PATCH 3/6] coverage: Build the global file table ahead of time --- Cargo.lock | 1 + compiler/rustc_codegen_llvm/Cargo.toml | 1 + .../src/coverageinfo/map_data.rs | 6 ++ .../src/coverageinfo/mapgen.rs | 68 ++++++++++++------- 4 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7aa243ad8b526..7a80f17214566 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3596,6 +3596,7 @@ version = "0.0.0" dependencies = [ "bitflags 1.3.2", "cstr", + "itertools", "libc", "measureme", "object", diff --git a/compiler/rustc_codegen_llvm/Cargo.toml b/compiler/rustc_codegen_llvm/Cargo.toml index be09820d08da2..e864337e5dcb7 100644 --- a/compiler/rustc_codegen_llvm/Cargo.toml +++ b/compiler/rustc_codegen_llvm/Cargo.toml @@ -9,6 +9,7 @@ test = false [dependencies] bitflags = "1.0" cstr = "0.2" +itertools = "0.10.5" libc = "0.2" measureme = "10.0.0" object = { version = "0.32.0", default-features = false, features = [ diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs index 2914f310f03bd..93a8a4b1d5e81 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs @@ -7,6 +7,7 @@ use rustc_middle::mir::coverage::{ CodeRegion, CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, Op, }; use rustc_middle::ty::Instance; +use rustc_span::Symbol; /// Holds all of the coverage mapping data associated with a function instance, /// collected during traversal of `Coverage` statements in the function's MIR. @@ -189,6 +190,11 @@ impl<'tcx> FunctionCoverage<'tcx> { if self.is_used { self.function_coverage_info.function_source_hash } else { 0 } } + /// Returns an iterator over all filenames used by this function's mappings. + pub(crate) fn all_file_names(&self) -> impl Iterator + Captures<'_> { + self.function_coverage_info.mappings.iter().map(|mapping| mapping.code_region.file_name) + } + /// Convert this function's coverage expression data into a form that can be /// passed through FFI to LLVM. pub(crate) fn counter_expressions( diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 965be3f05eea7..48cef4594768f 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -4,6 +4,7 @@ use crate::coverageinfo::ffi::CounterMappingRegion; use crate::coverageinfo::map_data::{FunctionCoverage, FunctionCoverageCollector}; use crate::llvm; +use itertools::Itertools as _; use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods}; use rustc_data_structures::fx::FxIndexSet; use rustc_hir::def::DefKind; @@ -57,12 +58,18 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { return; } - let mut global_file_table = GlobalFileTable::new(tcx); + let function_coverage_entries = function_coverage_map + .into_iter() + .map(|(instance, function_coverage)| (instance, function_coverage.into_finished())) + .collect::>(); + + let all_file_names = + function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names()); + let global_file_table = GlobalFileTable::new(all_file_names); // Encode coverage mappings and generate function records let mut function_data = Vec::new(); - for (instance, function_coverage) in function_coverage_map { - let function_coverage = function_coverage.into_finished(); + for (instance, function_coverage) in function_coverage_entries { debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); let mangled_function_name = tcx.symbol_name(instance).name; @@ -70,7 +77,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { let is_used = function_coverage.is_used(); let coverage_mapping_buffer = - encode_mappings_for_function(&mut global_file_table, &function_coverage); + encode_mappings_for_function(&global_file_table, &function_coverage); if coverage_mapping_buffer.is_empty() { if function_coverage.is_used() { @@ -88,7 +95,7 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { } // Encode all filenames referenced by counters/expressions in this module - let filenames_buffer = global_file_table.into_filenames_buffer(); + let filenames_buffer = global_file_table.make_filenames_buffer(tcx); let filenames_size = filenames_buffer.len(); let filenames_val = cx.const_bytes(&filenames_buffer); @@ -139,37 +146,48 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { coverageinfo::save_cov_data_to_mod(cx, cov_data_val); } +/// Maps "global" (per-CGU) file ID numbers to their underlying filenames. struct GlobalFileTable { - global_file_table: FxIndexSet, + /// This "raw" table doesn't include the working dir, so a filename's + /// global ID is its index in this set **plus one**. + raw_file_table: FxIndexSet, } impl GlobalFileTable { - fn new(tcx: TyCtxt<'_>) -> Self { - let mut global_file_table = FxIndexSet::default(); + fn new(all_file_names: impl IntoIterator) -> Self { + // Collect all of the filenames into a set. Filenames usually come in + // contiguous runs, so we can dedup adjacent ones to save work. + let mut raw_file_table = all_file_names.into_iter().dedup().collect::>(); + + // Sort the file table by its actual string values, not the arbitrary + // ordering of its symbols. + raw_file_table.sort_unstable_by(|a, b| a.as_str().cmp(b.as_str())); + + Self { raw_file_table } + } + + fn global_file_id_for_file_name(&self, file_name: Symbol) -> u32 { + let raw_id = self.raw_file_table.get_index_of(&file_name).unwrap_or_else(|| { + bug!("file name not found in prepared global file table: {file_name}"); + }); + // The raw file table doesn't include an entry for the working dir + // (which has ID 0), so add 1 to get the correct ID. + (raw_id + 1) as u32 + } + + fn make_filenames_buffer(&self, tcx: TyCtxt<'_>) -> Vec { // LLVM Coverage Mapping Format version 6 (zero-based encoded as 5) // requires setting the first filename to the compilation directory. // Since rustc generates coverage maps with relative paths, the // compilation directory can be combined with the relative paths // to get absolute paths, if needed. use rustc_session::RemapFileNameExt; - let working_dir = - Symbol::intern(&tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy()); - global_file_table.insert(working_dir); - Self { global_file_table } - } - - fn global_file_id_for_file_name(&mut self, file_name: Symbol) -> u32 { - let (global_file_id, _) = self.global_file_table.insert_full(file_name); - global_file_id as u32 - } - - fn into_filenames_buffer(self) -> Vec { - // This method takes `self` so that the caller can't accidentally - // modify the original file table after encoding it into a buffer. + let working_dir: &str = &tcx.sess.opts.working_dir.for_codegen(&tcx.sess).to_string_lossy(); llvm::build_byte_buffer(|buffer| { coverageinfo::write_filenames_section_to_buffer( - self.global_file_table.iter().map(Symbol::as_str), + // Insert the working dir at index 0, before the other filenames. + std::iter::once(working_dir).chain(self.raw_file_table.iter().map(Symbol::as_str)), buffer, ); }) @@ -182,7 +200,7 @@ impl GlobalFileTable { /// /// Newly-encountered filenames will be added to the global file table. fn encode_mappings_for_function( - global_file_table: &mut GlobalFileTable, + global_file_table: &GlobalFileTable, function_coverage: &FunctionCoverage<'_>, ) -> Vec { let mut counter_regions = function_coverage.counter_regions().collect::>(); @@ -203,7 +221,7 @@ fn encode_mappings_for_function( for counter_regions_for_file in counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name) { - // Look up (or allocate) the global file ID for this filename. + // Look up the global file ID for this filename. let file_name = counter_regions_for_file[0].1.file_name; let global_file_id = global_file_table.global_file_id_for_file_name(file_name); From 88159cafa7444c6ec9cc521c1f8128d5ab777d29 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 28 Sep 2023 22:36:40 +1000 Subject: [PATCH 4/6] coverage: Encapsulate local-to-global file mappings --- .../src/coverageinfo/mapgen.rs | 33 ++++++++++++++++--- compiler/rustc_codegen_llvm/src/lib.rs | 1 + 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 48cef4594768f..7dc6339271f50 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -194,6 +194,29 @@ impl GlobalFileTable { } } +rustc_index::newtype_index! { + // Tell the newtype macro to not generate `Encode`/`Decode` impls. + #[custom_encodable] + struct LocalFileId {} +} + +/// Holds a mapping from "local" (per-function) file IDs to "global" (per-CGU) +/// file IDs. +#[derive(Default)] +struct VirtualFileMapping { + local_to_global: IndexVec, +} + +impl VirtualFileMapping { + fn push_global_id(&mut self, global_file_id: u32) -> LocalFileId { + self.local_to_global.push(global_file_id) + } + + fn into_vec(self) -> Vec { + self.local_to_global.raw + } +} + /// Using the expressions and counter regions collected for a single function, /// generate the variable-sized payload of its corresponding `__llvm_covfun` /// entry. The payload is returned as a vector of bytes. @@ -210,7 +233,7 @@ fn encode_mappings_for_function( let expressions = function_coverage.counter_expressions().collect::>(); - let mut virtual_file_mapping = IndexVec::::new(); + let mut virtual_file_mapping = VirtualFileMapping::default(); let mut mapping_regions = Vec::with_capacity(counter_regions.len()); // Sort and group the list of (counter, region) mapping pairs by filename. @@ -226,8 +249,8 @@ fn encode_mappings_for_function( let global_file_id = global_file_table.global_file_id_for_file_name(file_name); // Associate that global file ID with a local file ID for this function. - let local_file_id: u32 = virtual_file_mapping.push(global_file_id); - debug!(" file id: local {local_file_id} => global {global_file_id} = '{file_name:?}'"); + let local_file_id = virtual_file_mapping.push_global_id(global_file_id); + debug!(" file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'"); // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. @@ -237,7 +260,7 @@ fn encode_mappings_for_function( debug!("Adding counter {counter:?} to map for {region:?}"); mapping_regions.push(CounterMappingRegion::code_region( counter, - local_file_id, + local_file_id.as_u32(), start_line, start_col, end_line, @@ -249,7 +272,7 @@ fn encode_mappings_for_function( // Encode the function's coverage mappings into a buffer. llvm::build_byte_buffer(|buffer| { coverageinfo::write_mapping_to_buffer( - virtual_file_mapping.raw, + virtual_file_mapping.into_vec(), expressions, mapping_regions, buffer, diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 7a390d35a2b9f..6607a0697cab4 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -12,6 +12,7 @@ #![feature(hash_raw_entry)] #![feature(iter_intersperse)] #![feature(let_chains)] +#![feature(min_specialization)] #![feature(never_type)] #![feature(slice_group_by)] #![feature(impl_trait_in_assoc_type)] From de4cfbca2ebec9c91037370146c518c069ece033 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Thu, 28 Sep 2023 22:36:40 +1000 Subject: [PATCH 5/6] coverage: Encode function mappings without re-sorting them The main change here is that `VirtualFileMapping` now uses an internal hashmap to de-duplicate incoming global file IDs. That removes the need for `encode_mappings_for_function` to re-sort its mappings by filename in order to de-duplicate them. (We still de-duplicate runs of identical filenames to save work, but this is not load-bearing for correctness, so a sort is not necessary.) --- .../src/coverageinfo/mapgen.rs | 26 ++++++++++--------- compiler/rustc_codegen_llvm/src/lib.rs | 2 +- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index 7dc6339271f50..daf5d06110ac8 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -6,7 +6,7 @@ use crate::llvm; use itertools::Itertools as _; use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods}; -use rustc_data_structures::fx::FxIndexSet; +use rustc_data_structures::fx::{FxIndexMap, FxIndexSet}; use rustc_hir::def::DefKind; use rustc_hir::def_id::DefId; use rustc_index::IndexVec; @@ -205,11 +205,15 @@ rustc_index::newtype_index! { #[derive(Default)] struct VirtualFileMapping { local_to_global: IndexVec, + global_to_local: FxIndexMap, } impl VirtualFileMapping { - fn push_global_id(&mut self, global_file_id: u32) -> LocalFileId { - self.local_to_global.push(global_file_id) + fn local_id_for_global(&mut self, global_file_id: u32) -> LocalFileId { + *self + .global_to_local + .entry(global_file_id) + .or_insert_with(|| self.local_to_global.push(global_file_id)) } fn into_vec(self) -> Vec { @@ -226,7 +230,7 @@ fn encode_mappings_for_function( global_file_table: &GlobalFileTable, function_coverage: &FunctionCoverage<'_>, ) -> Vec { - let mut counter_regions = function_coverage.counter_regions().collect::>(); + let counter_regions = function_coverage.counter_regions(); if counter_regions.is_empty() { return Vec::new(); } @@ -236,25 +240,23 @@ fn encode_mappings_for_function( let mut virtual_file_mapping = VirtualFileMapping::default(); let mut mapping_regions = Vec::with_capacity(counter_regions.len()); - // Sort and group the list of (counter, region) mapping pairs by filename. - // (Preserve any further ordering imposed by `FunctionCoverage`.) + // Group mappings into runs with the same filename, preserving the order + // yielded by `FunctionCoverage`. // Prepare file IDs for each filename, and prepare the mapping data so that // we can pass it through FFI to LLVM. - counter_regions.sort_by_key(|(_counter, region)| region.file_name); - for counter_regions_for_file in - counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name) + for (file_name, counter_regions_for_file) in + &counter_regions.group_by(|(_counter, region)| region.file_name) { // Look up the global file ID for this filename. - let file_name = counter_regions_for_file[0].1.file_name; let global_file_id = global_file_table.global_file_id_for_file_name(file_name); // Associate that global file ID with a local file ID for this function. - let local_file_id = virtual_file_mapping.push_global_id(global_file_id); + let local_file_id = virtual_file_mapping.local_id_for_global(global_file_id); debug!(" file id: {local_file_id:?} => global {global_file_id} = '{file_name:?}'"); // For each counter/region pair in this function+file, convert it to a // form suitable for FFI. - for &(counter, region) in counter_regions_for_file { + for (counter, region) in counter_regions_for_file { let CodeRegion { file_name: _, start_line, start_col, end_line, end_col } = *region; debug!("Adding counter {counter:?} to map for {region:?}"); diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 6607a0697cab4..8a6a5f79b3bb9 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -8,13 +8,13 @@ #![cfg_attr(not(bootstrap), feature(rustdoc_internals))] #![cfg_attr(not(bootstrap), doc(rust_logo))] #![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")] +#![feature(exact_size_is_empty)] #![feature(extern_types)] #![feature(hash_raw_entry)] #![feature(iter_intersperse)] #![feature(let_chains)] #![feature(min_specialization)] #![feature(never_type)] -#![feature(slice_group_by)] #![feature(impl_trait_in_assoc_type)] #![recursion_limit = "256"] #![allow(rustc::potential_query_instability)] From 6af9fef08590fd499370a2f6cbbae9ceacf15336 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Fri, 6 Oct 2023 23:53:23 +1100 Subject: [PATCH 6/6] coverage: Emit the filenames section before encoding per-function mappings Most coverage metadata is encoded into two sections in the final executable. The `__llvm_covmap` section mostly just contains a list of filenames, while the `__llvm_covfun` section contains encoded coverage maps for each instrumented function. The catch is that each per-function record also needs to contain a hash of the filenames list that it refers to. Historically this was handled by assembling most of the per-function data into a temporary list, then assembling the filenames buffer, then using the filenames hash to emit the per-function data, and then finally emitting the filenames table itself. However, now that we build the filenames table up-front (via a separate traversal of the per-function data), we can hash and emit that part first, and then emit each of the per-function records immediately after building. This removes the awkwardness of having to temporarily store nearly-complete per-function records. --- .../src/coverageinfo/mapgen.rs | 36 ++++++++----------- .../coverage-llvmir/filecheck.testprog.txt | 6 ++-- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs index daf5d06110ac8..274e0aeaaba4f 100644 --- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs +++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs @@ -67,8 +67,22 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { function_coverage_entries.iter().flat_map(|(_, fn_cov)| fn_cov.all_file_names()); let global_file_table = GlobalFileTable::new(all_file_names); + // Encode all filenames referenced by coverage mappings in this CGU. + let filenames_buffer = global_file_table.make_filenames_buffer(tcx); + + let filenames_size = filenames_buffer.len(); + let filenames_val = cx.const_bytes(&filenames_buffer); + let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); + + // Generate the coverage map header, which contains the filenames used by + // this CGU's coverage mappings, and store it in a well-known global. + let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); + coverageinfo::save_cov_data_to_mod(cx, cov_data_val); + + let mut unused_function_names = Vec::new(); + let covfun_section_name = coverageinfo::covfun_section_name(cx); + // Encode coverage mappings and generate function records - let mut function_data = Vec::new(); for (instance, function_coverage) in function_coverage_entries { debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance); @@ -91,23 +105,6 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { } } - function_data.push((mangled_function_name, source_hash, is_used, coverage_mapping_buffer)); - } - - // Encode all filenames referenced by counters/expressions in this module - let filenames_buffer = global_file_table.make_filenames_buffer(tcx); - - let filenames_size = filenames_buffer.len(); - let filenames_val = cx.const_bytes(&filenames_buffer); - let filenames_ref = coverageinfo::hash_bytes(&filenames_buffer); - - // Generate the LLVM IR representation of the coverage map and store it in a well-known global - let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); - - let mut unused_function_names = Vec::new(); - - let covfun_section_name = coverageinfo::covfun_section_name(cx); - for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data { if !is_used { unused_function_names.push(mangled_function_name); } @@ -141,9 +138,6 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) { llvm::set_linkage(array, llvm::Linkage::InternalLinkage); llvm::set_initializer(array, initializer); } - - // Save the coverage data value to LLVM IR - coverageinfo::save_cov_data_to_mod(cx, cov_data_val); } /// Maps "global" (per-CGU) file ID numbers to their underlying filenames. diff --git a/tests/run-make/coverage-llvmir/filecheck.testprog.txt b/tests/run-make/coverage-llvmir/filecheck.testprog.txt index 9d63fabd788b4..8ab18da21a200 100644 --- a/tests/run-make/coverage-llvmir/filecheck.testprog.txt +++ b/tests/run-make/coverage-llvmir/filecheck.testprog.txt @@ -3,12 +3,12 @@ WINDOWS: $__llvm_profile_runtime_user = comdat any -CHECK: @__covrec_{{[A-F0-9]+}}u = linkonce_odr hidden constant -CHECK-SAME: section "[[INSTR_PROF_COVFUN]]"[[COMDAT_IF_SUPPORTED]], align 8 - CHECK: @__llvm_coverage_mapping = private constant CHECK-SAME: section "[[INSTR_PROF_COVMAP]]", align 8 +CHECK: @__covrec_{{[A-F0-9]+}}u = linkonce_odr hidden constant +CHECK-SAME: section "[[INSTR_PROF_COVFUN]]"[[COMDAT_IF_SUPPORTED]], align 8 + WINDOWS: @__llvm_profile_runtime = external{{.*}}global i32 CHECK: @__profc__R{{[a-zA-Z0-9_]+}}testprog14will_be_called = {{private|internal}} global