Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework instantiation mode selection in monomorphization #128118

Draft
wants to merge 5 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion compiler/rustc_codegen_gcc/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn from_fn_attrs<'gcc, 'tcx>(
let inline = if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
InlineAttr::Never
} else if codegen_fn_attrs.inline == InlineAttr::None
&& instance.def.requires_inline(cx.tcx)
&& cx.tcx.cross_crate_inlinable(instance.def_id())
{
InlineAttr::Hint
} else {
Expand Down
13 changes: 7 additions & 6 deletions compiler/rustc_codegen_llvm/src/attributes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -348,12 +348,13 @@ pub fn from_fn_attrs<'ll, 'tcx>(
OptimizeAttr::Speed => {}
}

let inline =
if codegen_fn_attrs.inline == InlineAttr::None && instance.def.requires_inline(cx.tcx) {
InlineAttr::Hint
} else {
codegen_fn_attrs.inline
};
let inline = if codegen_fn_attrs.inline == InlineAttr::None
&& cx.tcx.cross_crate_inlinable(instance.def_id())
{
InlineAttr::Hint
} else {
codegen_fn_attrs.inline
};
to_add.extend(inline_attr(cx, inline));

// The `uwtable` attribute according to LLVM is:
Expand Down
4 changes: 1 addition & 3 deletions compiler/rustc_codegen_ssa/src/back/symbol_export.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,7 @@ fn reachable_non_generics_provider(tcx: TyCtxt<'_>, _: LocalCrate) -> DefIdMap<S
// Functions marked with #[inline] are codegened with "internal"
// linkage and are not exported unless marked with an extern
// indicator
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx)
|| tcx.codegen_fn_attrs(def_id.to_def_id()).contains_extern_indicator()
{
if !Instance::mono(tcx, def_id.to_def_id()).def.generates_cgu_internal_copy(tcx) {
Some(def_id)
} else {
None
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_metadata/src/rmeta/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1321,7 +1321,10 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
}

fn cross_crate_inlinable(self, id: DefIndex) -> bool {
self.root.tables.cross_crate_inlinable.get(self, id)
self.root.tables.cross_crate_inlinable.get(self, id).unwrap_or_else(|| {
debug!("cross_crate_inlinable missing for {id:?}");
false
})
}

fn get_fn_has_self_parameter(self, id: DefIndex, sess: &'a Session) -> bool {
Expand Down
49 changes: 42 additions & 7 deletions compiler/rustc_metadata/src/rmeta/encoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1656,19 +1656,50 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let tcx = self.tcx;
let reachable_set = tcx.reachable_set(());

let keys_and_jobs = tcx.mir_keys(()).iter().filter_map(|&def_id| {
let lang_items = tcx.lang_items();
for (trait_def_id, symbol) in [
(lang_items.clone_trait(), sym::clone),
(lang_items.fn_once_trait(), sym::call_once),
(lang_items.fn_mut_trait(), sym::call_mut),
(lang_items.fn_trait(), sym::call),
] {
if let Some(trait_def_id) = trait_def_id {
let fn_def_id = tcx
.associated_items(trait_def_id)
.filter_by_name_unhygienic(symbol)
.next()
.unwrap()
.def_id;
if fn_def_id.is_local() {
self.tables
.cross_crate_inlinable
.set(fn_def_id.index, Some(self.tcx.cross_crate_inlinable(fn_def_id)));
}
}
}

for (symbol, _) in tcx.exported_symbols(LOCAL_CRATE) {
use crate::rmeta::ExportedSymbol::*;
let (NonGeneric(def_id) | Generic(def_id, _) | ThreadLocalShim(def_id)) = symbol else {
continue;
};
self.tables.cross_crate_inlinable.set(def_id.index, Some(false));
}

for def_id in tcx.mir_keys(()).iter().copied() {
self.tables
.cross_crate_inlinable
.set(def_id.to_def_id().index, Some(self.tcx.cross_crate_inlinable(def_id)));
let (encode_const, encode_opt) = should_encode_mir(tcx, reachable_set, def_id);
if encode_const || encode_opt { Some((def_id, encode_const, encode_opt)) } else { None }
});
for (def_id, encode_const, encode_opt) in keys_and_jobs {
if encode_const || encode_opt {
} else {
continue;
}
debug_assert!(encode_const || encode_opt);

debug!("EntryBuilder::encode_mir({:?})", def_id);
if encode_opt {
record!(self.tables.optimized_mir[def_id.to_def_id()] <- tcx.optimized_mir(def_id));
self.tables
.cross_crate_inlinable
.set(def_id.to_def_id().index, self.tcx.cross_crate_inlinable(def_id));
record!(self.tables.closure_saved_names_of_captured_variables[def_id.to_def_id()]
<- tcx.closure_saved_names_of_captured_variables(def_id));

Expand Down Expand Up @@ -1710,6 +1741,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
self.tables.unused_generic_params.set(def_id.local_def_index, unused);
}

if let Some(def_id) = tcx.lang_items().drop_in_place_fn() {
self.tables.cross_crate_inlinable.set(def_id.index, Some(false));
}

// Encode all the deduced parameter attributes for everything that has MIR, even for items
// that can't be inlined. But don't if we aren't optimizing in non-incremental mode, to
// save the query traffic.
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_metadata/src/rmeta/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ define_tables! {
// That's why the encoded list needs to contain `ModChild` structures describing all the names
// individually instead of `DefId`s.
module_children_reexports: Table<DefIndex, LazyArray<ModChild>>,
cross_crate_inlinable: Table<DefIndex, bool>,
cross_crate_inlinable: Table<DefIndex, Option<bool>>,

- optional:
attributes: Table<DefIndex, LazyArray<ast::Attribute>>,
Expand Down
44 changes: 12 additions & 32 deletions compiler/rustc_middle/src/mir/mono.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::dep_graph::{DepNode, WorkProduct, WorkProductId};
use crate::ty::{GenericArgs, Instance, InstanceKind, SymbolName, TyCtxt};
use rustc_attr::InlineAttr;
use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE;
Expand All @@ -13,7 +12,6 @@ use rustc_hir::ItemId;
use rustc_index::Idx;
use rustc_macros::{HashStable, TyDecodable, TyEncodable};
use rustc_query_system::ich::StableHashingContext;
use rustc_session::config::OptLevel;
use rustc_span::symbol::Symbol;
use rustc_span::Span;
use std::fmt;
Expand Down Expand Up @@ -105,41 +103,23 @@ impl<'tcx> MonoItem<'tcx> {
}

pub fn instantiation_mode(&self, tcx: TyCtxt<'tcx>) -> InstantiationMode {
let generate_cgu_internal_copies = tcx
.sess
.opts
.unstable_opts
.inline_in_all_cgus
.unwrap_or_else(|| tcx.sess.opts.optimize != OptLevel::No)
&& !tcx.sess.link_dead_code();

if tcx.sess.link_dead_code() {
return InstantiationMode::GloballyShared { may_conflict: false };
}
match *self {
MonoItem::Fn(ref instance) => {
let entry_def_id = tcx.entry_fn(()).map(|(id, _)| id);
// If this function isn't inlined or otherwise has an extern
// indicator, then we'll be creating a globally shared version.
if tcx.codegen_fn_attrs(instance.def_id()).contains_extern_indicator()
|| !instance.def.generates_cgu_internal_copy(tcx)
|| Some(instance.def_id()) == entry_def_id
{
if Some(instance.def_id()) == entry_def_id {
return InstantiationMode::GloballyShared { may_conflict: false };
}

// At this point we don't have explicit linkage and we're an
// inlined function. If we're inlining into all CGUs then we'll
// be creating a local copy per CGU.
if generate_cgu_internal_copies {
return InstantiationMode::LocalCopy;
}

// Finally, if this is `#[inline(always)]` we're sure to respect
// that with an inline copy per CGU, but otherwise we'll be
// creating one copy of this `#[inline]` function which may
// conflict with upstream crates as it could be an exported
// symbol.
match tcx.codegen_fn_attrs(instance.def_id()).inline {
InlineAttr::Always => InstantiationMode::LocalCopy,
_ => InstantiationMode::GloballyShared { may_conflict: true },
if tcx.cross_crate_inlinable(instance.def_id()) {
if tcx.sess.opts.incremental.is_some() {
InstantiationMode::GloballyShared { may_conflict: true }
} else {
InstantiationMode::LocalCopy
}
} else {
InstantiationMode::GloballyShared { may_conflict: false }
}
}
MonoItem::Static(..) | MonoItem::GlobalAsm(..) => {
Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_middle/src/query/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2285,7 +2285,7 @@ rustc_queries! {
}

query cross_crate_inlinable(def_id: DefId) -> bool {
desc { "whether the item should be made inlinable across crates" }
desc { |tcx| "deciding whether `{}` should be inlinable across crates", tcx.def_path_str(key) }
separate_provide_extern
}
}
Expand Down
53 changes: 0 additions & 53 deletions compiler/rustc_middle/src/ty/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -286,26 +286,6 @@ impl<'tcx> InstanceKind<'tcx> {
tcx.get_attrs(self.def_id(), attr)
}

/// Returns `true` if the LLVM version of this instance is unconditionally
/// marked with `inline`. This implies that a copy of this instance is
/// generated in every codegen unit.
/// Note that this is only a hint. See the documentation for
/// `generates_cgu_internal_copy` for more information.
pub fn requires_inline(&self, tcx: TyCtxt<'tcx>) -> bool {
use rustc_hir::definitions::DefPathData;
let def_id = match *self {
ty::InstanceKind::Item(def) => def,
ty::InstanceKind::DropGlue(_, Some(_)) => return false,
ty::InstanceKind::AsyncDropGlueCtorShim(_, Some(_)) => return false,
ty::InstanceKind::ThreadLocalShim(_) => return false,
_ => return true,
};
matches!(
tcx.def_key(def_id).disambiguated_data.data,
DefPathData::Ctor | DefPathData::Closure
)
}

/// Returns `true` if the machine code for this instance is instantiated in
/// each codegen unit that references it.
/// Note that this is only a hint! The compiler can globally decide to *not*
Expand All @@ -314,39 +294,6 @@ impl<'tcx> InstanceKind<'tcx> {
/// `-Copt-level=0`) then the time for generating them is wasted and it's
/// better to create a single copy with external linkage.
pub fn generates_cgu_internal_copy(&self, tcx: TyCtxt<'tcx>) -> bool {
if self.requires_inline(tcx) {
return true;
}
if let ty::InstanceKind::DropGlue(.., Some(ty))
| ty::InstanceKind::AsyncDropGlueCtorShim(.., Some(ty)) = *self
{
// Drop glue generally wants to be instantiated at every codegen
// unit, but without an #[inline] hint. We should make this
// available to normal end-users.
if tcx.sess.opts.incremental.is_none() {
return true;
}
// When compiling with incremental, we can generate a *lot* of
// codegen units. Including drop glue into all of them has a
// considerable compile time cost.
//
// We include enums without destructors to allow, say, optimizing
// drops of `Option::None` before LTO. We also respect the intent of
// `#[inline]` on `Drop::drop` implementations.
return ty.ty_adt_def().map_or(true, |adt_def| {
match *self {
ty::InstanceKind::DropGlue(..) => adt_def.destructor(tcx).map(|dtor| dtor.did),
ty::InstanceKind::AsyncDropGlueCtorShim(..) => {
adt_def.async_destructor(tcx).map(|dtor| dtor.ctor)
}
_ => unreachable!(),
}
.map_or_else(|| adt_def.is_enum(), |did| tcx.cross_crate_inlinable(did))
});
}
if let ty::InstanceKind::ThreadLocalShim(..) = *self {
return false;
}
tcx.cross_crate_inlinable(self.def_id())
}

Expand Down
76 changes: 50 additions & 26 deletions compiler/rustc_mir_transform/src/cross_crate_inline.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::pass_manager as pm;
use rustc_attr::InlineAttr;
use rustc_hir::def::DefKind;
use rustc_hir::def_id::LocalDefId;
use rustc_hir::LangItem;
use rustc_middle::mir::visit::Visitor;
use rustc_middle::mir::*;
use rustc_middle::query::Providers;
Expand All @@ -16,24 +17,34 @@ pub fn provide(providers: &mut Providers) {
}

fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
// Bail quickly for DefIds that wouldn't be inlinable anyway, such as statics.
if !matches!(
tcx.def_kind(def_id),
DefKind::Fn | DefKind::AssocFn | DefKind::Closure | DefKind::Ctor(..)
) {
return false;
}

// The program entrypoint is never inlinable in any sense.
if let Some((entry_fn, _)) = tcx.entry_fn(()) {
if def_id == entry_fn.expect_local() {
return false;
}
}

let codegen_fn_attrs = tcx.codegen_fn_attrs(def_id);
// If this has an extern indicator, then this function is globally shared and thus will not
// generate cgu-internal copies which would make it cross-crate inlinable.
if codegen_fn_attrs.contains_extern_indicator() {
return false;
}

// This just reproduces the logic from Instance::requires_inline.
match tcx.def_kind(def_id) {
DefKind::Ctor(..) | DefKind::Closure => return true,
DefKind::Fn | DefKind::AssocFn => {}
_ => return false,
}

// From this point on, it is valid to return true or false.
if tcx.sess.opts.unstable_opts.cross_crate_inline_threshold == InliningThreshold::Always {
return true;
}
// From this point on, it is technically valid to return true or false.
let threshold = match tcx.sess.opts.unstable_opts.cross_crate_inline_threshold {
InliningThreshold::Always => return true,
InliningThreshold::Sometimes(threshold) => threshold,
InliningThreshold::Never => return false,
};

if tcx.has_attr(def_id, sym::rustc_intrinsic) {
// Intrinsic fallback bodies are always cross-crate inlineable.
Expand All @@ -43,42 +54,55 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
return true;
}

// Don't do any inference when incremental compilation is enabled; the additional inlining that
// inference permits also creates more work for small edits.
if tcx.sess.opts.incremental.is_some() {
return false;
}

// Obey source annotations first; this is important because it means we can use
// #[inline(never)] to force code generation.
match codegen_fn_attrs.inline {
InlineAttr::Never => return false,
InlineAttr::Hint | InlineAttr::Always => return true,
_ => {}
}

// Don't do any inference when incremental compilation is enabled; the additional inlining that
// inference permits also creates more work for small edits.
if tcx.sess.opts.incremental.is_some() {
return false;
InlineAttr::None => {}
}

// Don't do any inference if codegen optimizations are disabled and also MIR inlining is not
// enabled. This ensures that we do inference even if someone only passes -Zinline-mir,
// which is less confusing than having to also enable -Copt-level=1.
if matches!(tcx.sess.opts.optimize, OptLevel::No) && !pm::should_run_pass(tcx, &inline::Inline)
{
return false;
if !matches!(tcx.def_kind(def_id), DefKind::Ctor(..)) {
return false;
}
}

if !tcx.is_mir_available(def_id) {
return false;
if tcx.is_lang_item(def_id.into(), LangItem::DropInPlace)
|| tcx.is_lang_item(def_id.into(), LangItem::AsyncDropInPlace)
{
return true;
}

let threshold = match tcx.sess.opts.unstable_opts.cross_crate_inline_threshold {
InliningThreshold::Always => return true,
InliningThreshold::Sometimes(threshold) => threshold,
InliningThreshold::Never => return false,
};
// If there is no MIR for the DefId, we can't analyze the body. But also, this only arises in
// two relevant cases: extern functions and MIR shims. So here we recognize the MIR shims by a
// DefId that has no MIR and whose parent is one of the shimmed traits.
// Everything else is extern functions, and thus not a candidate for inlining.
if !tcx.is_mir_available(def_id) {
let parent = tcx.parent(def_id.into());
match tcx.lang_items().from_def_id(parent.into()) {
Some(LangItem::Clone | LangItem::FnOnce | LangItem::Fn | LangItem::FnMut) => {
return true;
}
_ => return false,
}
}

let mir = tcx.optimized_mir(def_id);
let mut checker =
CostChecker { tcx, callee_body: mir, calls: 0, statements: 0, landing_pads: 0, resumes: 0 };
checker.visit_body(mir);

checker.calls == 0
&& checker.resumes == 0
&& checker.landing_pads == 0
Expand Down
Loading
Loading