Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

coverage: Move most per-function coverage info into mir::Body #116046

Merged
merged 10 commits into from
Oct 18, 2023
Merged
23 changes: 6 additions & 17 deletions compiler/rustc_codegen_llvm/src/coverageinfo/ffi.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use rustc_middle::mir::coverage::{CounterId, ExpressionId, Operand};
use rustc_middle::mir::coverage::{CounterId, CovTerm, ExpressionId};

/// Must match the layout of `LLVMRustCounterKind`.
#[derive(Copy, Clone, Debug)]
Expand Down Expand Up @@ -43,11 +43,11 @@ impl Counter {
Self { kind: CounterKind::Expression, id: expression_id.as_u32() }
}

pub(crate) fn from_operand(operand: Operand) -> Self {
match operand {
Operand::Zero => Self::ZERO,
Operand::Counter(id) => Self::counter_value_reference(id),
Operand::Expression(id) => Self::expression(id),
pub(crate) fn from_term(term: CovTerm) -> Self {
match term {
CovTerm::Zero => Self::ZERO,
CovTerm::Counter(id) => Self::counter_value_reference(id),
CovTerm::Expression(id) => Self::expression(id),
}
}
}
Expand All @@ -73,17 +73,6 @@ pub struct CounterExpression {
pub rhs: Counter,
}

impl CounterExpression {
/// The dummy expression `(0 - 0)` has a representation of all zeroes,
/// making it marginally more efficient to initialize than `(0 + 0)`.
pub(crate) const DUMMY: Self =
Self { lhs: Counter::ZERO, kind: ExprKind::Subtract, rhs: Counter::ZERO };

pub fn new(lhs: Counter, kind: ExprKind, rhs: Counter) -> Self {
Self { kind, lhs, rhs }
}
}

/// Corresponds to enum `llvm::coverage::CounterMappingRegion::RegionKind`.
///
/// Must match the layout of `LLVMRustCounterMappingRegionKind`.
Expand Down
344 changes: 153 additions & 191 deletions compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs

Large diffs are not rendered by default.

30 changes: 13 additions & 17 deletions compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,8 @@ use rustc_hir::def::DefKind;
use rustc_hir::def_id::DefId;
use rustc_index::IndexVec;
use rustc_middle::bug;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::coverage::CodeRegion;
use rustc_middle::ty::TyCtxt;
use rustc_middle::ty::{self, TyCtxt};
use rustc_span::Symbol;

/// Generates and exports the Coverage Map.
Expand Down Expand Up @@ -60,10 +59,8 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {

// Encode coverage mappings and generate function records
let mut function_data = Vec::new();
for (instance, mut function_coverage) in function_coverage_map {
for (instance, function_coverage) in function_coverage_map {
debug!("Generate function coverage for {}, {:?}", cx.codegen_unit.name(), instance);
function_coverage.simplify_expressions();
let function_coverage = function_coverage;

let mangled_function_name = tcx.symbol_name(instance).name;
let source_hash = function_coverage.source_hash();
Expand Down Expand Up @@ -170,10 +167,11 @@ fn encode_mappings_for_function(
let mut virtual_file_mapping = IndexVec::<u32, u32>::new();
let mut mapping_regions = Vec::with_capacity(counter_regions.len());

// Sort the list of (counter, region) mapping pairs by region, so that they
// can be grouped by filename. Prepare file IDs for each filename, and
// prepare the mapping data so that we can pass it through FFI to LLVM.
counter_regions.sort_by_key(|(_counter, region)| *region);
// Sort and group the list of (counter, region) mapping pairs by filename.
// (Preserve any further ordering imposed by `FunctionCoverage`.)
// Prepare file IDs for each filename, and prepare the mapping data so that
// we can pass it through FFI to LLVM.
counter_regions.sort_by_key(|(_counter, region)| region.file_name);
for counter_regions_for_file in
counter_regions.group_by(|(_, a), (_, b)| a.file_name == b.file_name)
{
Expand Down Expand Up @@ -331,16 +329,14 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
for non_codegenned_def_id in
eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id))
{
let codegen_fn_attrs = tcx.codegen_fn_attrs(non_codegenned_def_id);

// If a function is marked `#[coverage(off)]`, then skip generating a
// dead code stub for it.
if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NO_COVERAGE) {
debug!("skipping unused fn marked #[coverage(off)]: {:?}", non_codegenned_def_id);
// Skip any function that didn't have coverage data added to it by the
// coverage instrumentor.
let body = tcx.instance_mir(ty::InstanceDef::Item(non_codegenned_def_id));
let Some(function_coverage_info) = body.function_coverage_info.as_deref() else {
continue;
}
};

debug!("generating unused fn: {:?}", non_codegenned_def_id);
cx.define_unused_fn(non_codegenned_def_id);
cx.define_unused_fn(non_codegenned_def_id, function_coverage_info);
}
}
74 changes: 43 additions & 31 deletions compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_llvm::RustString;
use rustc_middle::bug;
use rustc_middle::mir::coverage::{CounterId, CoverageKind};
use rustc_middle::mir::coverage::{CounterId, CoverageKind, FunctionCoverageInfo};
use rustc_middle::mir::Coverage;
use rustc_middle::ty;
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
Expand Down Expand Up @@ -88,56 +88,72 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
/// For used/called functions, the coverageinfo was already added to the
/// `function_coverage_map` (keyed by function `Instance`) during codegen.
/// But in this case, since the unused function was _not_ previously
/// codegenned, collect the coverage `CodeRegion`s from the MIR and add
/// them. Since the function is never called, all of its `CodeRegion`s can be
/// added as `unreachable_region`s.
fn define_unused_fn(&self, def_id: DefId) {
/// codegenned, collect the function coverage info from MIR and add an
/// "unused" entry to the function coverage map.
fn define_unused_fn(&self, def_id: DefId, function_coverage_info: &'tcx FunctionCoverageInfo) {
let instance = declare_unused_fn(self, def_id);
codegen_unused_fn_and_counter(self, instance);
add_unused_function_coverage(self, instance, def_id);
add_unused_function_coverage(self, instance, function_coverage_info);
}
}

impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
#[instrument(level = "debug", skip(self))]
fn add_coverage(&mut self, instance: Instance<'tcx>, coverage: &Coverage) {
// Our caller should have already taken care of inlining subtleties,
// so we can assume that counter/expression IDs in this coverage
// statement are meaningful for the given instance.
//
// (Either the statement was not inlined and directly belongs to this
// instance, or it was inlined *from* this instance.)

let bx = self;

let Some(function_coverage_info) =
bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref()
else {
debug!("function has a coverage statement but no coverage info");
return;
};

let Some(coverage_context) = bx.coverage_context() else { return };
let mut coverage_map = coverage_context.function_coverage_map.borrow_mut();
let func_coverage = coverage_map
.entry(instance)
.or_insert_with(|| FunctionCoverage::new(bx.tcx(), instance));
.or_insert_with(|| FunctionCoverage::new(instance, function_coverage_info));

let Coverage { kind, code_regions } = coverage;
let Coverage { kind } = coverage;
match *kind {
CoverageKind::Counter { function_source_hash, id } => {
debug!(
"ensuring function source hash is set for instance={:?}; function_source_hash={}",
instance, function_source_hash,
);
func_coverage.set_function_source_hash(function_source_hash);
func_coverage.add_counter(id, code_regions);
CoverageKind::CounterIncrement { id } => {
func_coverage.mark_counter_id_seen(id);
// We need to explicitly drop the `RefMut` before calling into `instrprof_increment`,
// as that needs an exclusive borrow.
drop(coverage_map);

let coverageinfo = bx.tcx().coverageinfo(instance.def);
// The number of counters passed to `llvm.instrprof.increment` might
// be smaller than the number originally inserted by the instrumentor,
// if some high-numbered counters were removed by MIR optimizations.
// If so, LLVM's profiler runtime will use fewer physical counters.
let num_counters =
bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
assert!(
num_counters as usize <= function_coverage_info.num_counters,
"num_counters disagreement: query says {num_counters} but function info only has {}",
function_coverage_info.num_counters
);

let fn_name = bx.get_pgo_func_name_var(instance);
let hash = bx.const_u64(function_source_hash);
let num_counters = bx.const_u32(coverageinfo.num_counters);
let hash = bx.const_u64(function_coverage_info.function_source_hash);
let num_counters = bx.const_u32(num_counters);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we use function_coverage_info.num_counters and remove the query altogether?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had originally wanted to do that, but eventually decided against it, for two reasons:

  • LLVM uses this number to determine how many physical counter variables to create in memory and in .profraw files. So in cases where high-numbered counters are removed by MIR opts, this reduces the overall size of the profile data. Removing the query now would make profile data larger than before, in cases where it's currently making a difference.
  • In the future, I want to improve this to get rid of all of the unused counters (instead of just the high-numbered ones), by renumbering the used counters to get rid of holes. If I remove the coverage query plumbing now, I'll just have to add it all back again later.

let index = bx.const_u32(id.as_u32());
debug!(
"codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?}, index={:?})",
fn_name, hash, num_counters, index,
);
bx.instrprof_increment(fn_name, hash, num_counters, index);
}
CoverageKind::Expression { id, lhs, op, rhs } => {
func_coverage.add_counter_expression(id, lhs, op, rhs, code_regions);
}
CoverageKind::Unreachable => {
func_coverage.add_unreachable_regions(code_regions);
CoverageKind::ExpressionUsed { id } => {
func_coverage.mark_expression_id_seen(id);
}
}
}
Expand Down Expand Up @@ -200,15 +216,11 @@ fn codegen_unused_fn_and_counter<'tcx>(cx: &CodegenCx<'_, 'tcx>, instance: Insta
fn add_unused_function_coverage<'tcx>(
cx: &CodegenCx<'_, 'tcx>,
instance: Instance<'tcx>,
def_id: DefId,
function_coverage_info: &'tcx FunctionCoverageInfo,
) {
let tcx = cx.tcx;

let mut function_coverage = FunctionCoverage::unused(tcx, instance);
for &code_region in tcx.covered_code_regions(def_id) {
let code_region = std::slice::from_ref(code_region);
function_coverage.add_unreachable_regions(code_region);
}
// An unused function's mappings will automatically be rewritten to map to
// zero, because none of its counters/expressions are marked as seen.
let function_coverage = FunctionCoverage::unused(instance, function_coverage_info);

if let Some(coverage_context) = cx.coverage_context() {
coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage);
Expand Down
105 changes: 72 additions & 33 deletions compiler/rustc_middle/src/mir/coverage.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Metadata from source code coverage analysis and instrumentation.

use rustc_index::IndexVec;
use rustc_macros::HashStable;
use rustc_span::Symbol;

Expand All @@ -8,6 +9,11 @@ use std::fmt::{self, Debug, Formatter};
rustc_index::newtype_index! {
/// ID of a coverage counter. Values ascend from 0.
///
/// Before MIR inlining, counter IDs are local to their enclosing function.
/// After MIR inlining, coverage statements may have been inlined into
/// another function, so use the statement's source-scope to find which
/// function/instance its IDs are meaningful for.
///
/// Note that LLVM handles counter IDs as `uint32_t`, so there is no need
/// to use a larger representation on the Rust side.
#[derive(HashStable)]
Expand All @@ -23,6 +29,11 @@ impl CounterId {
rustc_index::newtype_index! {
/// ID of a coverage-counter expression. Values ascend from 0.
///
/// Before MIR inlining, expression IDs are local to their enclosing function.
/// After MIR inlining, coverage statements may have been inlined into
/// another function, so use the statement's source-scope to find which
/// function/instance its IDs are meaningful for.
///
/// Note that LLVM handles expression IDs as `uint32_t`, so there is no need
/// to use a larger representation on the Rust side.
#[derive(HashStable)]
Expand All @@ -35,19 +46,21 @@ impl ExpressionId {
pub const START: Self = Self::from_u32(0);
}

/// Operand of a coverage-counter expression.
/// Enum that can hold a constant zero value, the ID of an physical coverage
/// counter, or the ID of a coverage-counter expression.
///
/// Operands can be a constant zero value, an actual coverage counter, or another
/// expression. Counter/expression operands are referred to by ID.
/// This was originally only used for expression operands (and named `Operand`),
/// but the zero/counter/expression distinction is also useful for representing
/// the value of code/gap mappings, and the true/false arms of branch mappings.
#[derive(Copy, Clone, PartialEq, Eq)]
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
pub enum Operand {
pub enum CovTerm {
Zero,
Counter(CounterId),
Expression(ExpressionId),
}

impl Debug for Operand {
impl Debug for CovTerm {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self {
Self::Zero => write!(f, "Zero"),
Expand All @@ -59,40 +72,31 @@ impl Debug for Operand {

#[derive(Clone, PartialEq, TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
pub enum CoverageKind {
Counter {
function_source_hash: u64,
/// ID of this counter within its enclosing function.
/// Expressions in the same function can refer to it as an operand.
id: CounterId,
},
Expression {
/// ID of this coverage-counter expression within its enclosing function.
/// Other expressions in the same function can refer to it as an operand.
id: ExpressionId,
lhs: Operand,
op: Op,
rhs: Operand,
},
Unreachable,
/// Marks the point in MIR control flow represented by a coverage counter.
///
/// This is eventually lowered to `llvm.instrprof.increment` in LLVM IR.
///
/// If this statement does not survive MIR optimizations, any mappings that
/// refer to this counter can have those references simplified to zero.
CounterIncrement { id: CounterId },

/// Marks the point in MIR control-flow represented by a coverage expression.
///
/// If this statement does not survive MIR optimizations, any mappings that
/// refer to this expression can have those references simplified to zero.
///
/// (This is only inserted for expression IDs that are directly used by
/// mappings. Intermediate expressions with no direct mappings are
/// retained/zeroed based on whether they are transitively used.)
ExpressionUsed { id: ExpressionId },
}

impl Debug for CoverageKind {
fn fmt(&self, fmt: &mut Formatter<'_>) -> fmt::Result {
use CoverageKind::*;
match self {
Counter { id, .. } => write!(fmt, "Counter({:?})", id.index()),
Expression { id, lhs, op, rhs } => write!(
fmt,
"Expression({:?}) = {:?} {} {:?}",
id.index(),
lhs,
match op {
Op::Add => "+",
Op::Subtract => "-",
},
rhs,
),
Unreachable => write!(fmt, "Unreachable"),
CounterIncrement { id } => write!(fmt, "CounterIncrement({:?})", id.index()),
ExpressionUsed { id } => write!(fmt, "ExpressionUsed({:?})", id.index()),
}
}
}
Expand Down Expand Up @@ -133,3 +137,38 @@ impl Op {
matches!(self, Self::Subtract)
}
}

#[derive(Clone, Debug)]
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
pub struct Expression {
pub lhs: CovTerm,
pub op: Op,
pub rhs: CovTerm,
}

#[derive(Clone, Debug)]
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
pub struct Mapping {
pub code_region: CodeRegion,

/// Indicates whether this mapping uses a counter value, expression value,
/// or zero value.
///
/// FIXME: When we add support for mapping kinds other than `Code`
/// (e.g. branch regions, expansion regions), replace this with a dedicated
/// mapping-kind enum.
pub term: CovTerm,
}

/// Stores per-function coverage information attached to a `mir::Body`,
/// to be used in conjunction with the individual coverage statements injected
/// into the function's basic blocks.
#[derive(Clone, Debug)]
#[derive(TyEncodable, TyDecodable, Hash, HashStable, TypeFoldable, TypeVisitable)]
pub struct FunctionCoverageInfo {
pub function_source_hash: u64,
pub num_counters: usize,

pub expressions: IndexVec<ExpressionId, Expression>,
pub mappings: Vec<Mapping>,
}
Loading
Loading