From 231dddde3e686c0903a28ce605b8c5d505a2ba21 Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Sat, 12 Jul 2025 16:38:46 -0700
Subject: [PATCH 1/3] Let `codegen_transmute_operand` just handle everything

When combined with 143720, this means `rvalue_creates_operand` can just return `true` for *every* `Rvalue`.  (A future PR could consider removing it, though just letting it optimize out is fine for now.)

It's nicer anyway, IMHO, because it avoids needing the layout checks to be consistent in the two places, and thus is an overall reduction in code.  Plus it's a more helpful building block when used in other places this way.
---
 compiler/rustc_codegen_ssa/src/mir/operand.rs |  7 --
 compiler/rustc_codegen_ssa/src/mir/rvalue.rs  | 85 ++++++++++---------
 tests/codegen-llvm/intrinsics/transmute.rs    | 26 +++---
 3 files changed, 59 insertions(+), 59 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/operand.rs b/compiler/rustc_codegen_ssa/src/mir/operand.rs
index 06bedaaa4a27e..998a6fb69412d 100644
--- a/compiler/rustc_codegen_ssa/src/mir/operand.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/operand.rs
@@ -338,13 +338,6 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
 
         let val = if field.is_zst() {
             OperandValue::ZeroSized
-        } else if let BackendRepr::SimdVector { .. } = self.layout.backend_repr {
-            // codegen_transmute_operand doesn't support SIMD, but since the previous
-            // check handled ZSTs, the only possible field access into something SIMD
-            // is to the `non_1zst_field` that's the same SIMD. (Other things, even
-            // just padding, would change the wrapper's representation type.)
-            assert_eq!(field.size, self.layout.size);
-            self.val
         } else if field.size == self.layout.size {
             assert_eq!(offset.bytes(), 0);
             fx.codegen_transmute_operand(bx, *self, field)
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 610e2fd231117..25b7447379ad8 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -2,12 +2,12 @@ use rustc_abi::{self as abi, FIRST_VARIANT};
 use rustc_middle::ty::adjustment::PointerCoercion;
 use rustc_middle::ty::layout::{HasTyCtxt, HasTypingEnv, LayoutOf, TyAndLayout};
 use rustc_middle::ty::{self, Instance, Ty, TyCtxt};
-use rustc_middle::{bug, mir};
+use rustc_middle::{bug, mir, span_bug};
 use rustc_session::config::OptLevel;
 use tracing::{debug, instrument};
 
 use super::operand::{OperandRef, OperandRefBuilder, OperandValue};
-use super::place::{PlaceRef, codegen_tag_value};
+use super::place::{PlaceRef, PlaceValue, codegen_tag_value};
 use super::{FunctionCx, LocalRef};
 use crate::common::{IntPredicate, TypeKind};
 use crate::traits::*;
@@ -229,6 +229,18 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         operand: OperandRef<'tcx, Bx::Value>,
         cast: TyAndLayout<'tcx>,
     ) -> OperandValue<Bx::Value> {
+        if let abi::BackendRepr::Memory { .. } = cast.backend_repr
+            && !cast.is_zst()
+        {
+            span_bug!(self.mir.span, "Use `codegen_transmute` to transmute to {cast:?}");
+        }
+
+        // `Layout` is interned, so we can do a cheap check for things that are
+        // exactly the same and thus don't need any handling.
+        if abi::Layout::eq(&operand.layout.layout, &cast.layout) {
+            return operand.val;
+        }
+
         // Check for transmutes that are always UB.
         if operand.layout.size != cast.size
             || operand.layout.is_uninhabited()
@@ -241,11 +253,9 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             return OperandValue::poison(bx, cast);
         }
 
+        let cx = bx.cx();
         match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
             _ if cast.is_zst() => OperandValue::ZeroSized,
-            (_, _, abi::BackendRepr::Memory { .. }) => {
-                bug!("Cannot `codegen_transmute_operand` to non-ZST memory-ABI output {cast:?}");
-            }
             (OperandValue::Ref(source_place_val), abi::BackendRepr::Memory { .. }, _) => {
                 assert_eq!(source_place_val.llextra, None);
                 // The existing alignment is part of `source_place_val`,
@@ -256,16 +266,38 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 OperandValue::Immediate(imm),
                 abi::BackendRepr::Scalar(from_scalar),
                 abi::BackendRepr::Scalar(to_scalar),
-            ) => OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar)),
+            ) if from_scalar.size(cx) == to_scalar.size(cx) => {
+                OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
+            }
             (
                 OperandValue::Pair(imm_a, imm_b),
                 abi::BackendRepr::ScalarPair(in_a, in_b),
                 abi::BackendRepr::ScalarPair(out_a, out_b),
-            ) => OperandValue::Pair(
-                transmute_scalar(bx, imm_a, in_a, out_a),
-                transmute_scalar(bx, imm_b, in_b, out_b),
-            ),
-            _ => bug!("Cannot `codegen_transmute_operand` {operand:?} to {cast:?}"),
+            ) if in_a.size(cx) == out_a.size(cx) && in_b.size(cx) == out_b.size(cx) => {
+                OperandValue::Pair(
+                    transmute_scalar(bx, imm_a, in_a, out_a),
+                    transmute_scalar(bx, imm_b, in_b, out_b),
+                )
+            }
+            _ => {
+                // For any other potentially-tricky cases, make a temporary instead.
+                // If anything else wants the target local to be in memory this won't
+                // be hit, as `codegen_transmute` will get called directly. Thus this
+                // is only for places where everything else wants the operand form,
+                // and thus it's not worth making those places get it from memory.
+                //
+                // Notably, Scalar ⇌ ScalarPair cases go here to avoid padding
+                // and endianness issues, as do SimdVector ones to avoid worrying
+                // about things like f32x8 ⇌ ptrx4 that would need multiple steps.
+                let align = Ord::max(operand.layout.align.abi, cast.align.abi);
+                let size = Ord::max(operand.layout.size, cast.size);
+                let temp = PlaceValue::alloca(bx, size, align);
+                bx.lifetime_start(temp.llval, size);
+                operand.val.store(bx, temp.with_type(operand.layout));
+                let val = bx.load_operand(temp.with_type(cast)).val;
+                bx.lifetime_end(temp.llval, size);
+                val
+            }
         }
     }
 
@@ -967,37 +999,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
     /// layout in this code when the right thing will happen anyway.
     pub(crate) fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>) -> bool {
         match *rvalue {
-            mir::Rvalue::Cast(mir::CastKind::Transmute, ref operand, cast_ty) => {
-                let operand_ty = operand.ty(self.mir, self.cx.tcx());
-                let cast_layout = self.cx.layout_of(self.monomorphize(cast_ty));
-                let operand_layout = self.cx.layout_of(self.monomorphize(operand_ty));
-                match (operand_layout.backend_repr, cast_layout.backend_repr) {
-                    // When the output will be in memory anyway, just use its place
-                    // (instead of the operand path) unless it's the trivial ZST case.
-                    (_, abi::BackendRepr::Memory { .. }) => cast_layout.is_zst(),
-
-                    // Otherwise (for a non-memory output) if the input is memory
-                    // then we can just read the value from the place.
-                    (abi::BackendRepr::Memory { .. }, _) => true,
-
-                    // When we have scalar immediates, we can only convert things
-                    // where the sizes match, to avoid endianness questions.
-                    (abi::BackendRepr::Scalar(a), abi::BackendRepr::Scalar(b)) =>
-                        a.size(self.cx) == b.size(self.cx),
-                    (abi::BackendRepr::ScalarPair(a0, a1), abi::BackendRepr::ScalarPair(b0, b1)) =>
-                        a0.size(self.cx) == b0.size(self.cx) && a1.size(self.cx) == b1.size(self.cx),
-
-                    // Mixing Scalars and ScalarPairs can get quite complicated when
-                    // padding and undef get involved, so leave that to the memory path.
-                    (abi::BackendRepr::Scalar(_), abi::BackendRepr::ScalarPair(_, _)) |
-                    (abi::BackendRepr::ScalarPair(_, _), abi::BackendRepr::Scalar(_)) => false,
-
-                    // SIMD vectors aren't worth the trouble of dealing with complex
-                    // cases like from vectors of f32 to vectors of pointers or
-                    // from fat pointers to vectors of u16. (See #143194 #110021 ...)
-                    (abi::BackendRepr::SimdVector { .. }, _) | (_, abi::BackendRepr::SimdVector { .. }) => false,
-                }
-            }
             mir::Rvalue::Ref(..) |
             mir::Rvalue::CopyForDeref(..) |
             mir::Rvalue::RawPtr(..) |
diff --git a/tests/codegen-llvm/intrinsics/transmute.rs b/tests/codegen-llvm/intrinsics/transmute.rs
index c9a1cd58af338..91cff38773d78 100644
--- a/tests/codegen-llvm/intrinsics/transmute.rs
+++ b/tests/codegen-llvm/intrinsics/transmute.rs
@@ -191,22 +191,28 @@ pub unsafe fn check_byte_from_bool(x: bool) -> u8 {
 // CHECK-LABEL: @check_to_pair(
 #[no_mangle]
 pub unsafe fn check_to_pair(x: u64) -> Option<i32> {
-    // CHECK: %_0 = alloca [8 x i8], align 4
-    // CHECK: store i64 %x, ptr %_0, align 4
+    // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8
+    // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: store i64 %x, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR0:.+]] = load i32, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4
+    // CHECK: %[[PAIR1:.+]] = load i32, ptr %[[PAIR1P]], align 4
+    // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: insertvalue {{.+}}, i32 %[[PAIR0]], 0
+    // CHECK: insertvalue {{.+}}, i32 %[[PAIR1]], 1
     transmute(x)
 }
 
 // CHECK-LABEL: @check_from_pair(
 #[no_mangle]
 pub unsafe fn check_from_pair(x: Option<i32>) -> u64 {
-    // The two arguments are of types that are only 4-aligned, but they're
-    // immediates so we can write using the destination alloca's alignment.
-    const { assert!(std::mem::align_of::<Option<i32>>() == 4) };
-
-    // CHECK: %_0 = alloca [8 x i8], align 8
-    // CHECK: store i32 %x.0, ptr %_0, align 8
-    // CHECK: store i32 %x.1, ptr %0, align 4
-    // CHECK: %[[R:.+]] = load i64, ptr %_0, align 8
+    // CHECK: %[[TEMP:.+]] = alloca [8 x i8], align 8
+    // CHECK: call void @llvm.lifetime.start.p0(i64 8, ptr %[[TEMP]])
+    // CHECK: store i32 %x.0, ptr %[[TEMP]], align 8
+    // CHECK: %[[PAIR1P:.+]] = getelementptr inbounds i8, ptr %[[TEMP]], i64 4
+    // CHECK: store i32 %x.1, ptr %[[PAIR1P]], align 4
+    // CHECK: %[[R:.+]] = load i64, ptr %[[TEMP]], align 8
+    // CHECK: call void @llvm.lifetime.end.p0(i64 8, ptr %[[TEMP]])
     // CHECK: ret i64 %[[R]]
     transmute(x)
 }

From ea0c7788c049b608f2f497ec3a4b4117c359523c Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Fri, 18 Jul 2025 01:33:32 -0700
Subject: [PATCH 2/3] re-enable direct `bitcast`s for Int/Float vector
 transmutes (but not ones involving pointers)

---
 compiler/rustc_codegen_ssa/src/mir/rvalue.rs  |  21 +++
 .../codegen-llvm/intrinsics/transmute-simd.rs | 176 ++++++++++++++++++
 2 files changed, 197 insertions(+)
 create mode 100644 tests/codegen-llvm/intrinsics/transmute-simd.rs

diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 25b7447379ad8..4c080de2b31e6 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -253,6 +253,19 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             return OperandValue::poison(bx, cast);
         }
 
+        // To or from pointers takes different methods, so we use this to restrict
+        // the SimdVector case to types which can be `bitcast` between each other.
+        #[inline]
+        fn vector_can_bitcast(x: abi::Scalar) -> bool {
+            matches!(
+                x,
+                abi::Scalar::Initialized {
+                    value: abi::Primitive::Int(..) | abi::Primitive::Float(..),
+                    ..
+                }
+            )
+        }
+
         let cx = bx.cx();
         match (operand.val, operand.layout.backend_repr, cast.backend_repr) {
             _ if cast.is_zst() => OperandValue::ZeroSized,
@@ -269,6 +282,14 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             ) if from_scalar.size(cx) == to_scalar.size(cx) => {
                 OperandValue::Immediate(transmute_scalar(bx, imm, from_scalar, to_scalar))
             }
+            (
+                OperandValue::Immediate(imm),
+                abi::BackendRepr::SimdVector { element: from_scalar, .. },
+                abi::BackendRepr::SimdVector { element: to_scalar, .. },
+            ) if vector_can_bitcast(from_scalar) && vector_can_bitcast(to_scalar) => {
+                let to_backend_ty = bx.cx().immediate_backend_type(cast);
+                OperandValue::Immediate(bx.bitcast(imm, to_backend_ty))
+            }
             (
                 OperandValue::Pair(imm_a, imm_b),
                 abi::BackendRepr::ScalarPair(in_a, in_b),
diff --git a/tests/codegen-llvm/intrinsics/transmute-simd.rs b/tests/codegen-llvm/intrinsics/transmute-simd.rs
new file mode 100644
index 0000000000000..e34b27e133359
--- /dev/null
+++ b/tests/codegen-llvm/intrinsics/transmute-simd.rs
@@ -0,0 +1,176 @@
+//@ compile-flags: -Copt-level=3 -C no-prepopulate-passes
+//@ only-64bit (so I don't need to worry about usize)
+//@ revisions: aarch64 x86_64
+//@ [aarch64] only-aarch64
+//@ [aarch64] compile-flags: -C target-feature=+neon
+//@ [x86_64] only-x86_64
+//@ [x86_64] compile-flags: -C target-feature=+sse2
+
+#![crate_type = "lib"]
+#![feature(core_intrinsics)]
+#![feature(portable_simd)]
+
+use std::intrinsics::transmute;
+use std::simd::{Simd, f32x4, f64x2, i32x4, i64x2};
+type PtrX2 = Simd<*const (), 2>;
+
+// These tests use the "C" ABI so that the vectors in question aren't passed and
+// returned though memory (as they are in the "Rust" ABI), which greatly
+// simplifies seeing the difference between the in-operand cases vs the ones
+// that fallback to just using the `LocalKind::Memory` path.
+
+// CHECK-LABEL: <2 x i64> @mixed_int(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn mixed_int(v: i32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @mixed_float(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn mixed_float(v: f32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_same_lanes(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_same_lanes(v: f32x4) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_same_lanes(v: i64x2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @float_int_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_int_widen(v: f32x4) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x float> %v to <2 x i64>
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @int_float_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_float_widen(v: i32x4) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <4 x i32> %v to <2 x double>
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x i32> @float_int_narrow(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_int_narrow(v: f64x2) -> i32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x double> %v to <4 x i32>
+    // CHECK: ret <4 x i32> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <4 x float> @int_float_narrow(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_float_narrow(v: i64x2) -> f32x4 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[RET:.+]] = bitcast <2 x i64> %v to <4 x float>
+    // CHECK: ret <4 x float> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_same_lanes(<2 x double> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_same_lanes(v: f64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x double> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x double> @ptr_float_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_float_same_lanes(v: PtrX2) -> f64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x double>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x double> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_same_lanes(<2 x i64> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_same_lanes(v: i64x2) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x i64> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x i64> @ptr_int_same_lanes(<2 x ptr> %v)
+#[no_mangle]
+pub extern "C" fn ptr_int_same_lanes(v: PtrX2) -> i64x2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <2 x ptr> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x i64>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x i64> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @float_ptr_widen(<4 x float> %v)
+#[no_mangle]
+pub extern "C" fn float_ptr_widen(v: f32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x float> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}
+
+// CHECK-LABEL: <2 x ptr> @int_ptr_widen(<4 x i32> %v)
+#[no_mangle]
+pub extern "C" fn int_ptr_widen(v: i32x4) -> PtrX2 {
+    // CHECK-NOT: alloca
+    // CHECK: %[[TEMP:.+]] = alloca [16 x i8]
+    // CHECK-NOT: alloca
+    // CHECK: call void @llvm.lifetime.start.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: store <4 x i32> %v, ptr %[[TEMP]]
+    // CHECK: %[[RET:.+]] = load <2 x ptr>, ptr %[[TEMP]]
+    // CHECK: call void @llvm.lifetime.end.p0(i64 16, ptr %[[TEMP]])
+    // CHECK: ret <2 x ptr> %[[RET]]
+    unsafe { transmute(v) }
+}

From b7e025cfb64094d8672c752a9fffb12e9bc79567 Mon Sep 17 00:00:00 2001
From: Scott McMurray <scottmcm@users.noreply.github.com>
Date: Wed, 23 Jul 2025 08:40:27 -0700
Subject: [PATCH 3/3] Remove `rvalue_creates_operand` entirely

Split to a separate commit to it could be reverted later if necessary, should we get new `Rvalue`s where we can't handle it this way.
---
 compiler/rustc_codegen_ssa/src/mir/analyze.rs |  5 ---
 compiler/rustc_codegen_ssa/src/mir/rvalue.rs  | 44 +------------------
 2 files changed, 2 insertions(+), 47 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/analyze.rs b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
index 6d6465dd798b5..b9a6d3af44510 100644
--- a/compiler/rustc_codegen_ssa/src/mir/analyze.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/analyze.rs
@@ -170,11 +170,6 @@ impl<'a, 'b, 'tcx, Bx: BuilderMethods<'b, 'tcx>> Visitor<'tcx> for LocalAnalyzer
 
         if let Some(local) = place.as_local() {
             self.define(local, DefLocation::Assignment(location));
-            if self.locals[local] != LocalKind::Memory {
-                if !self.fx.rvalue_creates_operand(rvalue) {
-                    self.locals[local] = LocalKind::Memory;
-                }
-            }
         } else {
             self.visit_place(place, PlaceContext::MutatingUse(MutatingUseContext::Store), location);
         }
diff --git a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
index 4c080de2b31e6..e9d37517c5ffd 100644
--- a/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/rvalue.rs
@@ -180,7 +180,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
             }
 
             _ => {
-                assert!(self.rvalue_creates_operand(rvalue));
                 let temp = self.codegen_rvalue_operand(bx, rvalue);
                 temp.val.store(bx, dest);
             }
@@ -218,11 +217,8 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
     /// Transmutes an `OperandValue` to another `OperandValue`.
     ///
-    /// This is supported only for cases where [`Self::rvalue_creates_operand`]
-    /// returns `true`, and will ICE otherwise. (In particular, anything that
-    /// would need to `alloca` in order to return a `PlaceValue` will ICE,
-    /// expecting those to go via [`Self::codegen_transmute`] instead where
-    /// the destination place is already allocated.)
+    /// This is supported for all cases where the `cast` type is SSA,
+    /// but for non-ZSTs with [`abi::BackendRepr::Memory`] it ICEs.
     pub(crate) fn codegen_transmute_operand(
         &mut self,
         bx: &mut Bx,
@@ -379,8 +375,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         bx: &mut Bx,
         rvalue: &mir::Rvalue<'tcx>,
     ) -> OperandRef<'tcx, Bx::Value> {
-        assert!(self.rvalue_creates_operand(rvalue), "cannot codegen {rvalue:?} to operand",);
-
         match *rvalue {
             mir::Rvalue::Cast(ref kind, ref source, mir_cast_ty) => {
                 let operand = self.codegen_operand(bx, source);
@@ -706,8 +700,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
                 let ty = self.monomorphize(ty);
                 let layout = self.cx.layout_of(ty);
 
-                // `rvalue_creates_operand` has arranged that we only get here if
-                // we can build the aggregate immediate from the field immediates.
                 let mut builder = OperandRefBuilder::new(layout);
                 for (field_idx, field) in fields.iter_enumerated() {
                     let op = self.codegen_operand(bx, field);
@@ -1008,38 +1000,6 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
 
         OperandValue::Pair(val, of)
     }
-
-    /// Returns `true` if the `rvalue` can be computed into an [`OperandRef`],
-    /// rather than needing a full `PlaceRef` for the assignment destination.
-    ///
-    /// This is used by the [`super::analyze`] code to decide which MIR locals
-    /// can stay as SSA values (as opposed to generating `alloca` slots for them).
-    /// As such, some paths here return `true` even where the specific rvalue
-    /// will not actually take the operand path because the result type is such
-    /// that it always gets an `alloca`, but where it's not worth re-checking the
-    /// layout in this code when the right thing will happen anyway.
-    pub(crate) fn rvalue_creates_operand(&self, rvalue: &mir::Rvalue<'tcx>) -> bool {
-        match *rvalue {
-            mir::Rvalue::Ref(..) |
-            mir::Rvalue::CopyForDeref(..) |
-            mir::Rvalue::RawPtr(..) |
-            mir::Rvalue::Len(..) |
-            mir::Rvalue::Cast(..) | // (*)
-            mir::Rvalue::ShallowInitBox(..) | // (*)
-            mir::Rvalue::BinaryOp(..) |
-            mir::Rvalue::UnaryOp(..) |
-            mir::Rvalue::Discriminant(..) |
-            mir::Rvalue::NullaryOp(..) |
-            mir::Rvalue::ThreadLocalRef(_) |
-            mir::Rvalue::Use(..) |
-            mir::Rvalue::Repeat(..) | // (*)
-            mir::Rvalue::Aggregate(..) | // (*)
-            mir::Rvalue::WrapUnsafeBinder(..) => // (*)
-                true,
-        }
-
-        // (*) this is only true if the type is suitable
-    }
 }
 
 /// Transmutes a single scalar value `imm` from `from_scalar` to `to_scalar`.