From ab1764180a9ad91a845e159c06fb84af77480907 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sun, 17 Mar 2024 17:42:37 -0400 Subject: [PATCH] Omit non-needs_drop drop_in_place in vtables This replaces the drop_in_place reference with null in vtables. On librustc_driver.so, this drops about ~17k dynamic relocations from the output, since many vtables can now be placed in read-only memory, rather than having a relocated pointer included. This makes a tradeoff by adding a null check at vtable call sites. That's hard to avoid without changing the vtable format (e.g., to use a pc-relative relocation instead of an absolute address, and avoid the dynamic relocation that way). But it seems likely that the check is cheap at runtime. --- .../rustc_codegen_cranelift/src/abi/mod.rs | 16 ++ compiler/rustc_codegen_cranelift/src/base.rs | 5 +- compiler/rustc_codegen_ssa/src/meth.rs | 27 ++- compiler/rustc_codegen_ssa/src/mir/block.rs | 203 ++++++++++-------- compiler/rustc_middle/src/ty/vtable.rs | 12 +- .../partitioning/vtable-through-const.rs | 43 ++-- tests/codegen/vtable-loads.rs | 16 ++ 7 files changed, 206 insertions(+), 116 deletions(-) create mode 100644 tests/codegen/vtable-loads.rs diff --git a/compiler/rustc_codegen_cranelift/src/abi/mod.rs b/compiler/rustc_codegen_cranelift/src/abi/mod.rs index 6f346af25c6dd..ca0db2f2a04a9 100644 --- a/compiler/rustc_codegen_cranelift/src/abi/mod.rs +++ b/compiler/rustc_codegen_cranelift/src/abi/mod.rs @@ -593,6 +593,7 @@ pub(crate) fn codegen_drop<'tcx>( fx: &mut FunctionCx<'_, '_, 'tcx>, source_info: mir::SourceInfo, drop_place: CPlace<'tcx>, + target: BasicBlock, ) { let ty = drop_place.layout().ty; let drop_instance = Instance::resolve_drop_in_place(fx.tcx, ty).polymorphize(fx.tcx); @@ -618,6 +619,12 @@ pub(crate) fn codegen_drop<'tcx>( let ptr = ptr.get_addr(fx); let drop_fn = crate::vtable::drop_fn_of_obj(fx, vtable); + let is_null = fx.bcx.ins().icmp_imm(IntCC::Equal, drop_fn, 0); + let target_block = fx.get_block(target); + let continued = fx.bcx.create_block(); + fx.bcx.ins().brif(is_null, target_block, &[], continued, &[]); + fx.bcx.switch_to_block(continued); + // FIXME(eddyb) perhaps move some of this logic into // `Instance::resolve_drop_in_place`? let virtual_drop = Instance { @@ -657,6 +664,12 @@ pub(crate) fn codegen_drop<'tcx>( let (data, vtable) = drop_place.to_cvalue(fx).dyn_star_force_data_on_stack(fx); let drop_fn = crate::vtable::drop_fn_of_obj(fx, vtable); + let is_null = fx.bcx.ins().icmp_imm(IntCC::Equal, drop_fn, 0); + let target_block = fx.get_block(target); + let continued = fx.bcx.create_block(); + fx.bcx.ins().brif(is_null, target_block, &[], continued, &[]); + fx.bcx.switch_to_block(continued); + let virtual_drop = Instance { def: ty::InstanceDef::Virtual(drop_instance.def_id(), 0), args: drop_instance.args, @@ -695,4 +708,7 @@ pub(crate) fn codegen_drop<'tcx>( } } } + + let target_block = fx.get_block(target); + fx.bcx.ins().jump(target_block, &[]); } diff --git a/compiler/rustc_codegen_cranelift/src/base.rs b/compiler/rustc_codegen_cranelift/src/base.rs index e3d050df4cd1b..9d2a9fe853d5a 100644 --- a/compiler/rustc_codegen_cranelift/src/base.rs +++ b/compiler/rustc_codegen_cranelift/src/base.rs @@ -515,10 +515,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) { } TerminatorKind::Drop { place, target, unwind: _, replace: _ } => { let drop_place = codegen_place(fx, *place); - crate::abi::codegen_drop(fx, source_info, drop_place); - - let target_block = fx.get_block(*target); - fx.bcx.ins().jump(target_block, &[]); + crate::abi::codegen_drop(fx, source_info, drop_place, *target); } }; } diff --git a/compiler/rustc_codegen_ssa/src/meth.rs b/compiler/rustc_codegen_ssa/src/meth.rs index 23036e9bea022..000f984218932 100644 --- a/compiler/rustc_codegen_ssa/src/meth.rs +++ b/compiler/rustc_codegen_ssa/src/meth.rs @@ -14,12 +14,13 @@ impl<'a, 'tcx> VirtualIndex { VirtualIndex(index as u64) } - pub fn get_fn>( + fn get_fn_inner>( self, bx: &mut Bx, llvtable: Bx::Value, ty: Ty<'tcx>, fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + nonnull: bool, ) -> Bx::Value { // Load the function pointer from the object. debug!("get_fn({llvtable:?}, {ty:?}, {self:?})"); @@ -40,13 +41,35 @@ impl<'a, 'tcx> VirtualIndex { } else { let gep = bx.inbounds_ptradd(llvtable, bx.const_usize(vtable_byte_offset)); let ptr = bx.load(llty, gep, ptr_align); - bx.nonnull_metadata(ptr); // VTable loads are invariant. bx.set_invariant_load(ptr); + if nonnull { + bx.nonnull_metadata(ptr); + } ptr } } + pub fn get_optional_fn>( + self, + bx: &mut Bx, + llvtable: Bx::Value, + ty: Ty<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + ) -> Bx::Value { + self.get_fn_inner(bx, llvtable, ty, fn_abi, false) + } + + pub fn get_fn>( + self, + bx: &mut Bx, + llvtable: Bx::Value, + ty: Ty<'tcx>, + fn_abi: &FnAbi<'tcx, Ty<'tcx>>, + ) -> Bx::Value { + self.get_fn_inner(bx, llvtable, ty, fn_abi, true) + } + pub fn get_usize>( self, bx: &mut Bx, diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index 26e429034dca6..67d881e714e9d 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -499,6 +499,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { &mut self, helper: TerminatorCodegenHelper<'tcx>, bx: &mut Bx, + source_info: &mir::SourceInfo, location: mir::Place<'tcx>, target: mir::BasicBlock, unwind: mir::UnwindAction, @@ -522,90 +523,109 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { args1 = [place.val.llval]; &args1[..] }; - let (drop_fn, fn_abi, drop_instance) = - match ty.kind() { - // FIXME(eddyb) perhaps move some of this logic into - // `Instance::resolve_drop_in_place`? - ty::Dynamic(_, _, ty::Dyn) => { - // IN THIS ARM, WE HAVE: - // ty = *mut (dyn Trait) - // which is: exists ( *mut T, Vtable ) - // args[0] args[1] - // - // args = ( Data, Vtable ) - // | - // v - // /-------\ - // | ... | - // \-------/ - // - let virtual_drop = Instance { - def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), // idx 0: the drop function - args: drop_fn.args, - }; - debug!("ty = {:?}", ty); - debug!("drop_fn = {:?}", drop_fn); - debug!("args = {:?}", args); - let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); - let vtable = args[1]; - // Truncate vtable off of args list - args = &args[..1]; - ( - meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) - .get_fn(bx, vtable, ty, fn_abi), - fn_abi, - virtual_drop, - ) - } - ty::Dynamic(_, _, ty::DynStar) => { - // IN THIS ARM, WE HAVE: - // ty = *mut (dyn* Trait) - // which is: *mut exists (T, Vtable) - // - // args = [ * ] - // | - // v - // ( Data, Vtable ) - // | - // v - // /-------\ - // | ... | - // \-------/ - // - // - // WE CAN CONVERT THIS INTO THE ABOVE LOGIC BY DOING - // - // data = &(*args[0]).0 // gives a pointer to Data above (really the same pointer) - // vtable = (*args[0]).1 // loads the vtable out - // (data, vtable) // an equivalent Rust `*mut dyn Trait` - // - // SO THEN WE CAN USE THE ABOVE CODE. - let virtual_drop = Instance { - def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), // idx 0: the drop function - args: drop_fn.args, - }; - debug!("ty = {:?}", ty); - debug!("drop_fn = {:?}", drop_fn); - debug!("args = {:?}", args); - let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); - let meta_ptr = place.project_field(bx, 1); - let meta = bx.load_operand(meta_ptr); - // Truncate vtable off of args list - args = &args[..1]; - debug!("args' = {:?}", args); - ( - meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) - .get_fn(bx, meta.immediate(), ty, fn_abi), - fn_abi, - virtual_drop, - ) - } - _ => ( - bx.get_fn_addr(drop_fn), - bx.fn_abi_of_instance(drop_fn, ty::List::empty()), - drop_fn, - ), - }; + let (maybe_null, drop_fn, fn_abi, drop_instance) = match ty.kind() { + // FIXME(eddyb) perhaps move some of this logic into + // `Instance::resolve_drop_in_place`? + ty::Dynamic(_, _, ty::Dyn) => { + // IN THIS ARM, WE HAVE: + // ty = *mut (dyn Trait) + // which is: exists ( *mut T, Vtable ) + // args[0] args[1] + // + // args = ( Data, Vtable ) + // | + // v + // /-------\ + // | ... | + // \-------/ + // + let virtual_drop = Instance { + def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), // idx 0: the drop function + args: drop_fn.args, + }; + debug!("ty = {:?}", ty); + debug!("drop_fn = {:?}", drop_fn); + debug!("args = {:?}", args); + let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); + let vtable = args[1]; + // Truncate vtable off of args list + args = &args[..1]; + ( + true, + meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) + .get_optional_fn(bx, vtable, ty, fn_abi), + fn_abi, + virtual_drop, + ) + } + ty::Dynamic(_, _, ty::DynStar) => { + // IN THIS ARM, WE HAVE: + // ty = *mut (dyn* Trait) + // which is: *mut exists (T, Vtable) + // + // args = [ * ] + // | + // v + // ( Data, Vtable ) + // | + // v + // /-------\ + // | ... | + // \-------/ + // + // + // WE CAN CONVERT THIS INTO THE ABOVE LOGIC BY DOING + // + // data = &(*args[0]).0 // gives a pointer to Data above (really the same pointer) + // vtable = (*args[0]).1 // loads the vtable out + // (data, vtable) // an equivalent Rust `*mut dyn Trait` + // + // SO THEN WE CAN USE THE ABOVE CODE. + let virtual_drop = Instance { + def: ty::InstanceDef::Virtual(drop_fn.def_id(), 0), // idx 0: the drop function + args: drop_fn.args, + }; + debug!("ty = {:?}", ty); + debug!("drop_fn = {:?}", drop_fn); + debug!("args = {:?}", args); + let fn_abi = bx.fn_abi_of_instance(virtual_drop, ty::List::empty()); + let meta_ptr = place.project_field(bx, 1); + let meta = bx.load_operand(meta_ptr); + // Truncate vtable off of args list + args = &args[..1]; + debug!("args' = {:?}", args); + ( + true, + meth::VirtualIndex::from_index(ty::COMMON_VTABLE_ENTRIES_DROPINPLACE) + .get_optional_fn(bx, meta.immediate(), ty, fn_abi), + fn_abi, + virtual_drop, + ) + } + _ => ( + false, + bx.get_fn_addr(drop_fn), + bx.fn_abi_of_instance(drop_fn, ty::List::empty()), + drop_fn, + ), + }; + + // We generate a null check for the drop_fn. This saves a bunch of relocations being + // generated for no-op drops. + if maybe_null { + let is_not_null = bx.append_sibling_block("is_not_null"); + let llty = bx.fn_ptr_backend_type(fn_abi); + let null = bx.const_null(llty); + let non_null = bx.icmp( + base::bin_op_to_icmp_predicate(mir::BinOp::Ne.to_hir_binop(), false), + drop_fn, + null, + ); + bx.cond_br(non_null, is_not_null, self.llbb(target)); + bx.switch_to_block(is_not_null); + self.set_debug_loc(bx, *source_info); + } + helper.do_call( self, bx, @@ -616,7 +636,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { unwind, &[], Some(drop_instance), - mergeable_succ, + !maybe_null && mergeable_succ, ) } @@ -1345,9 +1365,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { MergingSucc::False } - mir::TerminatorKind::Drop { place, target, unwind, replace: _ } => { - self.codegen_drop_terminator(helper, bx, place, target, unwind, mergeable_succ()) - } + mir::TerminatorKind::Drop { place, target, unwind, replace: _ } => self + .codegen_drop_terminator( + helper, + bx, + &terminator.source_info, + place, + target, + unwind, + mergeable_succ(), + ), mir::TerminatorKind::Assert { ref cond, expected, ref msg, target, unwind } => self .codegen_assert_terminator( diff --git a/compiler/rustc_middle/src/ty/vtable.rs b/compiler/rustc_middle/src/ty/vtable.rs index 2ad194313100a..b8371cc2bca57 100644 --- a/compiler/rustc_middle/src/ty/vtable.rs +++ b/compiler/rustc_middle/src/ty/vtable.rs @@ -84,10 +84,14 @@ pub(super) fn vtable_allocation_provider<'tcx>( let idx: u64 = u64::try_from(idx).unwrap(); let scalar = match entry { VtblEntry::MetadataDropInPlace => { - let instance = ty::Instance::resolve_drop_in_place(tcx, ty); - let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); - let fn_ptr = Pointer::from(fn_alloc_id); - Scalar::from_pointer(fn_ptr, &tcx) + if ty.needs_drop(tcx, ty::ParamEnv::reveal_all()) { + let instance = ty::Instance::resolve_drop_in_place(tcx, ty); + let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance); + let fn_ptr = Pointer::from(fn_alloc_id); + Scalar::from_pointer(fn_ptr, &tcx) + } else { + Scalar::from_maybe_pointer(Pointer::null(), &tcx) + } } VtblEntry::MetadataSize => Scalar::from_uint(size, ptr_size), VtblEntry::MetadataAlign => Scalar::from_uint(align, ptr_size), diff --git a/tests/codegen-units/partitioning/vtable-through-const.rs b/tests/codegen-units/partitioning/vtable-through-const.rs index 111b4fa1b9ac2..e29e0944b7678 100644 --- a/tests/codegen-units/partitioning/vtable-through-const.rs +++ b/tests/codegen-units/partitioning/vtable-through-const.rs @@ -12,19 +12,25 @@ #![feature(start)] mod mod1 { + struct NeedsDrop; + + impl Drop for NeedsDrop { + fn drop(&mut self) {} + } + pub trait Trait1 { fn do_something(&self) {} fn do_something_else(&self) {} } - impl Trait1 for u32 {} + impl Trait1 for NeedsDrop {} pub trait Trait1Gen { fn do_something(&self, x: T) -> T; fn do_something_else(&self, x: T) -> T; } - impl Trait1Gen for u32 { + impl Trait1Gen for NeedsDrop { fn do_something(&self, x: T) -> T { x } fn do_something_else(&self, x: T) -> T { x } } @@ -33,8 +39,8 @@ mod mod1 { fn id(x: T) -> T { x } // These are referenced, so they produce mono-items (see start()) - pub const TRAIT1_REF: &'static Trait1 = &0u32 as &Trait1; - pub const TRAIT1_GEN_REF: &'static Trait1Gen = &0u32 as &Trait1Gen; + pub const TRAIT1_REF: &'static Trait1 = &NeedsDrop as &Trait1; + pub const TRAIT1_GEN_REF: &'static Trait1Gen = &NeedsDrop as &Trait1Gen; pub const ID_CHAR: fn(char) -> char = id::; @@ -44,47 +50,48 @@ mod mod1 { fn do_something_else(&self) {} } - //~ MONO_ITEM fn ::do_something @@ vtable_through_const-mod1.volatile[Internal] - //~ MONO_ITEM fn ::do_something_else @@ vtable_through_const-mod1.volatile[Internal] - impl Trait2 for u32 {} + //~ MONO_ITEM fn ::do_something @@ vtable_through_const-mod1.volatile[Internal] + //~ MONO_ITEM fn ::do_something_else @@ vtable_through_const-mod1.volatile[Internal] + impl Trait2 for NeedsDrop {} pub trait Trait2Gen { fn do_something(&self, x: T) -> T; fn do_something_else(&self, x: T) -> T; } - impl Trait2Gen for u32 { + impl Trait2Gen for NeedsDrop { fn do_something(&self, x: T) -> T { x } fn do_something_else(&self, x: T) -> T { x } } // These are not referenced, so they do not produce mono-items - pub const TRAIT2_REF: &'static Trait2 = &0u32 as &Trait2; - pub const TRAIT2_GEN_REF: &'static Trait2Gen = &0u32 as &Trait2Gen; + pub const TRAIT2_REF: &'static Trait2 = &NeedsDrop as &Trait2; + pub const TRAIT2_GEN_REF: &'static Trait2Gen = &NeedsDrop as &Trait2Gen; pub const ID_I64: fn(i64) -> i64 = id::; } //~ MONO_ITEM fn start #[start] fn start(_: isize, _: *const *const u8) -> isize { - //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(None) @@ vtable_through_const[Internal] + //~ MONO_ITEM fn ::drop @@ vtable_through_const-fallback.cgu[Internal] + //~ MONO_ITEM fn std::ptr::drop_in_place:: - shim(Some(mod1::NeedsDrop)) @@ vtable_through_const-fallback.cgu[External] // Since Trait1::do_something() is instantiated via its default implementation, // it is considered a generic and is instantiated here only because it is // referenced in this module. - //~ MONO_ITEM fn ::do_something_else @@ vtable_through_const-mod1.volatile[External] + //~ MONO_ITEM fn ::do_something_else @@ vtable_through_const-mod1.volatile[External] // Although it is never used, Trait1::do_something_else() has to be - // instantiated locally here too, otherwise the <&u32 as &Trait1> vtable + // instantiated locally here too, otherwise the <&NeedsDrop as &Trait1> vtable // could not be fully constructed. - //~ MONO_ITEM fn ::do_something @@ vtable_through_const-mod1.volatile[External] + //~ MONO_ITEM fn ::do_something @@ vtable_through_const-mod1.volatile[External] mod1::TRAIT1_REF.do_something(); // Same as above - //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[External] - //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[External] - //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[Internal] - //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[Internal] + //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[External] + //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[External] + //~ MONO_ITEM fn >::do_something @@ vtable_through_const-mod1.volatile[Internal] + //~ MONO_ITEM fn >::do_something_else @@ vtable_through_const-mod1.volatile[Internal] mod1::TRAIT1_GEN_REF.do_something(0u8); //~ MONO_ITEM fn mod1::id:: @@ vtable_through_const-mod1.volatile[External] diff --git a/tests/codegen/vtable-loads.rs b/tests/codegen/vtable-loads.rs new file mode 100644 index 0000000000000..1dd6ca51063b1 --- /dev/null +++ b/tests/codegen/vtable-loads.rs @@ -0,0 +1,16 @@ +//@ compile-flags: -O + +#![crate_type = "lib"] + +// CHECK-LABEL: @loop_skips_vtable_load +#[no_mangle] +pub fn loop_skips_vtable_load(x: &dyn Fn()) { + // CHECK: load ptr, ptr %0{{.*}}, !invariant.load + // CHECK-NEXT: tail call void %1 + // CHECK-NOT: load ptr + x(); + for _ in 0..100 { + // CHECK: tail call void %1 + x(); + } +}