Auto merge of rust-lang#133878 - fmease:rollup-ov1am8o, r=fmease

Rollup of 6 pull requests Successful merges: - rust-lang#127565 (Teach rustc about the Xtensa VaListImpl) - rust-lang#128004 (codegen `#[naked]` functions using global asm) - rust-lang#133256 (CI: use free runners for i686-gnu jobs) - rust-lang#133472 (Run TLS destructors for wasm32-wasip1-threads) - rust-lang#133632 (CI: split x86_64-msvc job) - rust-lang#133827 (CI: rfl: move job forward to Linux v6.13-rc1) r? `@ghost` `@rustbot` modify labels: rollup
rust-lang-ci · Dec 5, 2024 · 13c0180 · 13c0180
2 parents acabb52 + a0f24c7
commit 13c0180
Show file tree

Hide file tree

Showing 31 changed files with 892 additions and 144 deletions.
diff --git a/compiler/rustc_attr/src/builtin.rs b/compiler/rustc_attr/src/builtin.rs
@@ -49,12 +49,21 @@ pub enum InlineAttr {
     Never,
 }
 
-#[derive(Clone, Encodable, Decodable, Debug, PartialEq, Eq, HashStable_Generic)]
+#[derive(Copy, Clone, Encodable, Decodable, Debug, PartialEq, Eq, HashStable_Generic)]
 pub enum InstructionSetAttr {
     ArmA32,
     ArmT32,
 }
 
+impl InstructionSetAttr {
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::ArmA32 => sym::a32.as_str(),
+            Self::ArmT32 => sym::t32.as_str(),
+        }
+    }
+}
+
 #[derive(Clone, Encodable, Decodable, Debug, HashStable_Generic)]
 pub enum OptimizeAttr {
     None,

diff --git a/compiler/rustc_codegen_gcc/src/asm.rs b/compiler/rustc_codegen_gcc/src/asm.rs
@@ -867,6 +867,13 @@ impl<'gcc, 'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         template_str.push_str("\n.popsection");
         self.context.add_top_level_asm(None, &template_str);
     }
+
+    fn mangled_name(&self, instance: Instance<'tcx>) -> String {
+        // TODO(@Amanieu): Additional mangling is needed on
+        // some targets to add a leading underscore (Mach-O)
+        // or byte count suffixes (x86 Windows).
+        self.tcx.symbol_name(instance).name.to_string()
+    }
 }
 
 fn modifier_to_gcc(

diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs
@@ -442,6 +442,14 @@ impl<'tcx> AsmCodegenMethods<'tcx> for CodegenCx<'_, 'tcx> {
             );
         }
     }
+
+    fn mangled_name(&self, instance: Instance<'tcx>) -> String {
+        let llval = self.get_fn(instance);
+        llvm::build_string(|s| unsafe {
+            llvm::LLVMRustGetMangledName(llval, s);
+        })
+        .expect("symbol is not valid UTF-8")
+    }
 }
 
 pub(crate) fn inline_asm_call<'ll>(

diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs
@@ -395,17 +395,9 @@ pub(crate) fn llfn_attrs_from_instance<'ll, 'tcx>(
         to_add.push(MemoryEffects::None.create_attr(cx.llcx));
     }
     if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
-        to_add.push(AttributeKind::Naked.create_attr(cx.llcx));
-        // HACK(jubilee): "indirect branch tracking" works by attaching prologues to functions.
-        // And it is a module-level attribute, so the alternative is pulling naked functions into
-        // new LLVM modules. Otherwise LLVM's "naked" functions come with endbr prefixes per
-        // https://github.com/rust-lang/rust/issues/98768
-        to_add.push(AttributeKind::NoCfCheck.create_attr(cx.llcx));
-        if llvm_util::get_version() < (19, 0, 0) {
-            // Prior to LLVM 19, branch-target-enforcement was disabled by setting the attribute to
-            // the string "false". Now it is disabled by absence of the attribute.
-            to_add.push(llvm::CreateAttrStringValue(cx.llcx, "branch-target-enforcement", "false"));
-        }
+        // do nothing; a naked function is converted into an extern function
+        // and a global assembly block. LLVM's support for naked functions is
+        // not used.
     } else {
         // Do not set sanitizer attributes for naked functions.
         to_add.extend(sanitize_attrs(cx, codegen_fn_attrs.no_sanitize));

diff --git a/compiler/rustc_codegen_llvm/src/va_arg.rs b/compiler/rustc_codegen_llvm/src/va_arg.rs
@@ -10,15 +10,23 @@ use crate::type_::Type;
 use crate::type_of::LayoutLlvmExt;
 use crate::value::Value;
 
+fn round_up_to_alignment<'ll>(
+    bx: &mut Builder<'_, 'll, '_>,
+    mut value: &'ll Value,
+    align: Align,
+) -> &'ll Value {
+    value = bx.add(value, bx.cx().const_i32(align.bytes() as i32 - 1));
+    return bx.and(value, bx.cx().const_i32(-(align.bytes() as i32)));
+}
+
 fn round_pointer_up_to_alignment<'ll>(
     bx: &mut Builder<'_, 'll, '_>,
     addr: &'ll Value,
     align: Align,
     ptr_ty: &'ll Type,
 ) -> &'ll Value {
     let mut ptr_as_int = bx.ptrtoint(addr, bx.cx().type_isize());
-    ptr_as_int = bx.add(ptr_as_int, bx.cx().const_i32(align.bytes() as i32 - 1));
-    ptr_as_int = bx.and(ptr_as_int, bx.cx().const_i32(-(align.bytes() as i32)));
+    ptr_as_int = round_up_to_alignment(bx, ptr_as_int, align);
     bx.inttoptr(ptr_as_int, ptr_ty)
 }
 
@@ -270,6 +278,106 @@ fn emit_s390x_va_arg<'ll, 'tcx>(
     bx.load(val_type, val_addr, layout.align.abi)
 }
 
+fn emit_xtensa_va_arg<'ll, 'tcx>(
+    bx: &mut Builder<'_, 'll, 'tcx>,
+    list: OperandRef<'tcx, &'ll Value>,
+    target_ty: Ty<'tcx>,
+) -> &'ll Value {
+    // Implementation of va_arg for Xtensa. There doesn't seem to be an authoritative source for
+    // this, other than "what GCC does".
+    //
+    // The va_list type has three fields:
+    // struct __va_list_tag {
+    //   int32_t *va_stk; // Arguments passed on the stack
+    //   int32_t *va_reg; // Arguments passed in registers, saved to memory by the prologue.
+    //   int32_t va_ndx; // Offset into the arguments, in bytes
+    // };
+    //
+    // The first 24 bytes (equivalent to 6 registers) come from va_reg, the rest from va_stk.
+    // Thus if va_ndx is less than 24, the next va_arg *may* read from va_reg,
+    // otherwise it must come from va_stk.
+    //
+    // Primitive arguments are never split between registers and the stack. For example, if loading an 8 byte
+    // primitive value and va_ndx = 20, we instead bump the offset and read everything from va_stk.
+    let va_list_addr = list.immediate();
+    // FIXME: handle multi-field structs that split across regsave/stack?
+    let layout = bx.cx.layout_of(target_ty);
+    let from_stack = bx.append_sibling_block("va_arg.from_stack");
+    let from_regsave = bx.append_sibling_block("va_arg.from_regsave");
+    let end = bx.append_sibling_block("va_arg.end");
+
+    // (*va).va_ndx
+    let va_reg_offset = 4;
+    let va_ndx_offset = va_reg_offset + 4;
+    let offset_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_ndx_offset)]);
+
+    let offset = bx.load(bx.type_i32(), offset_ptr, bx.tcx().data_layout.i32_align.abi);
+    let offset = round_up_to_alignment(bx, offset, layout.align.abi);
+
+    let slot_size = layout.size.align_to(Align::from_bytes(4).unwrap()).bytes() as i32;
+
+    // Update the offset in va_list, by adding the slot's size.
+    let offset_next = bx.add(offset, bx.const_i32(slot_size));
+
+    // Figure out where to look for our value. We do that by checking the end of our slot (offset_next).
+    // If that is within the regsave area, then load from there. Otherwise load from the stack area.
+    let regsave_size = bx.const_i32(24);
+    let use_regsave = bx.icmp(IntPredicate::IntULE, offset_next, regsave_size);
+    bx.cond_br(use_regsave, from_regsave, from_stack);
+
+    bx.switch_to_block(from_regsave);
+    // update va_ndx
+    bx.store(offset_next, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+
+    // (*va).va_reg
+    let regsave_area_ptr =
+        bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(va_reg_offset)]);
+    let regsave_area =
+        bx.load(bx.type_ptr(), regsave_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let regsave_value_ptr = bx.inbounds_gep(bx.type_i8(), regsave_area, &[offset]);
+    bx.br(end);
+
+    bx.switch_to_block(from_stack);
+
+    // The first time we switch from regsave to stack we needs to adjust our offsets a bit.
+    // va_stk is set up such that the first stack argument is always at va_stk + 32.
+    // The corrected offset is written back into the va_list struct.
+
+    // let offset_corrected = cmp::max(offset, 32);
+    let stack_offset_start = bx.const_i32(32);
+    let needs_correction = bx.icmp(IntPredicate::IntULE, offset, stack_offset_start);
+    let offset_corrected = bx.select(needs_correction, stack_offset_start, offset);
+
+    // let offset_next_corrected = offset_corrected + slot_size;
+    // va_ndx = offset_next_corrected;
+    let offset_next_corrected = bx.add(offset_next, bx.const_i32(slot_size));
+    // update va_ndx
+    bx.store(offset_next_corrected, offset_ptr, bx.tcx().data_layout.pointer_align.abi);
+
+    // let stack_value_ptr = unsafe { (*va).va_stk.byte_add(offset_corrected) };
+    let stack_area_ptr = bx.inbounds_gep(bx.type_i8(), va_list_addr, &[bx.cx.const_usize(0)]);
+    let stack_area = bx.load(bx.type_ptr(), stack_area_ptr, bx.tcx().data_layout.pointer_align.abi);
+    let stack_value_ptr = bx.inbounds_gep(bx.type_i8(), stack_area, &[offset_corrected]);
+    bx.br(end);
+
+    bx.switch_to_block(end);
+
+    // On big-endian, for values smaller than the slot size we'd have to align the read to the end
+    // of the slot rather than the start. While the ISA and GCC support big-endian, all the Xtensa
+    // targets supported by rustc are litte-endian so don't worry about it.
+
+    // if from_regsave {
+    //     unsafe { *regsave_value_ptr }
+    // } else {
+    //     unsafe { *stack_value_ptr }
+    // }
+    assert!(bx.tcx().sess.target.endian == Endian::Little);
+    let value_ptr =
+        bx.phi(bx.type_ptr(), &[regsave_value_ptr, stack_value_ptr], &[from_regsave, from_stack]);
+    return bx.load(layout.llvm_type(bx), value_ptr, layout.align.abi);
+}
+
 pub(super) fn emit_va_arg<'ll, 'tcx>(
     bx: &mut Builder<'_, 'll, 'tcx>,
     addr: OperandRef<'tcx, &'ll Value>,
@@ -302,6 +410,7 @@ pub(super) fn emit_va_arg<'ll, 'tcx>(
             let indirect: bool = target_ty_size > 8 || !target_ty_size.is_power_of_two();
             emit_ptr_va_arg(bx, addr, target_ty, indirect, Align::from_bytes(8).unwrap(), false)
         }
+        "xtensa" => emit_xtensa_va_arg(bx, addr, target_ty),
         // For all other architecture/OS combinations fall back to using
         // the LLVM va_arg instruction.
         // https://llvm.org/docs/LangRef.html#va-arg-instruction

diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs
@@ -542,6 +542,13 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
         }
     });
 
+    // naked function MUST NOT be inlined! This attribute is required for the rust compiler itself,
+    // but not for the code generation backend because at that point the naked function will just be
+    // a declaration, with a definition provided in global assembly.
+    if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
+        codegen_fn_attrs.inline = InlineAttr::Never;
+    }
+
     codegen_fn_attrs.optimize = attrs.iter().fold(OptimizeAttr::None, |ia, attr| {
         if !attr.has_name(sym::optimize) {
             return ia;
@@ -626,10 +633,6 @@ fn codegen_fn_attrs(tcx: TyCtxt<'_>, did: LocalDefId) -> CodegenFnAttrs {
         }
     }
 
-    if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::NAKED) {
-        codegen_fn_attrs.inline = InlineAttr::Never;
-    }
-
     // Weak lang items have the same semantics as "std internal" symbols in the
     // sense that they're preserved through all our LTO passes and only
     // strippable by the linker.

diff --git a/compiler/rustc_codegen_ssa/src/mir/mod.rs b/compiler/rustc_codegen_ssa/src/mir/mod.rs
@@ -20,6 +20,7 @@ mod coverageinfo;
 pub mod debuginfo;
 mod intrinsic;
 mod locals;
+mod naked_asm;
 pub mod operand;
 pub mod place;
 mod rvalue;
@@ -176,6 +177,11 @@ pub fn codegen_mir<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     let fn_abi = cx.fn_abi_of_instance(instance, ty::List::empty());
     debug!("fn_abi: {:?}", fn_abi);
 
+    if cx.tcx().codegen_fn_attrs(instance.def_id()).flags.contains(CodegenFnAttrFlags::NAKED) {
+        crate::mir::naked_asm::codegen_naked_asm::<Bx>(cx, &mir, instance);
+        return;
+    }
+
     let debug_context = cx.create_function_debug_context(instance, fn_abi, llfn, mir);
 
     let start_llbb = Bx::append_block(cx, llfn, "start");