Add autocasts for bf16 and bf16xN

sayantn · sayantn · commit 423d8ece691b · 2025-10-08T15:35:24.000+05:30
diff --git a/compiler/rustc_codegen_llvm/src/abi.rs b/compiler/rustc_codegen_llvm/src/abi.rs
@@ -370,6 +370,8 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
         }
 
         match self.type_kind(llvm_ty) {
+            TypeKind::BFloat => rust_ty == self.type_i16(),
+
             // Some LLVM intrinsics return **non-packed** structs, but they can't be mimicked from Rust
             // due to auto field-alignment in non-packed structs (packed structs are represented in LLVM
             // as, well, packed structs, so they won't match with those either)
@@ -387,11 +389,18 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
                     },
                 )
             }
-            TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {
+            TypeKind::Vector => {
                 let element_count = self.vector_length(llvm_ty) as u64;
-                let int_width = element_count.next_power_of_two().max(8);
+                let llvm_element_ty = self.element_type(llvm_ty);
 
-                rust_ty == self.type_ix(int_width)
+                if llvm_element_ty == self.type_bf16() {
+                    rust_ty == self.type_vector(self.type_i16(), element_count)
+                } else if llvm_element_ty == self.type_i1() {
+                    let int_width = element_count.next_power_of_two().max(8);
+                    rust_ty == self.type_ix(int_width)
+                } else {
+                    false
+                }
             }
             _ => false,
         }
diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs
@@ -1761,7 +1761,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
                     self.zext_i1_vector_to_int(val, src_ty, dest_ty)
                 }
             }
-            _ => unreachable!(),
+            _ => self.bitcast(val, dest_ty), // for `bf16(xN)` <-> `u16(xN)`
         }
     }
 
diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs
@@ -963,6 +963,9 @@ unsafe extern "C" {
     pub(crate) fn LLVMDoubleTypeInContext(C: &Context) -> &Type;
     pub(crate) fn LLVMFP128TypeInContext(C: &Context) -> &Type;
 
+    // Operations on non-IEEE real types
+    pub(crate) fn LLVMBFloatTypeInContext(C: &Context) -> &Type;
+
     // Operations on function types
     pub(crate) fn LLVMFunctionType<'a>(
         ReturnType: &'a Type,
diff --git a/compiler/rustc_codegen_llvm/src/type_.rs b/compiler/rustc_codegen_llvm/src/type_.rs
@@ -174,6 +174,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {
             )
         }
     }
+
+    pub(crate) fn type_bf16(&self) -> &'ll Type {
+        unsafe { llvm::LLVMBFloatTypeInContext(self.llcx()) }
+    }
 }
 
 impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
@@ -247,7 +251,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {
 
     fn float_width(&self, ty: &'ll Type) -> usize {
         match self.type_kind(ty) {
-            TypeKind::Half => 16,
+            TypeKind::Half | TypeKind::BFloat => 16,
             TypeKind::Float => 32,
             TypeKind::Double => 64,
             TypeKind::X86_FP80 => 80,
diff --git a/tests/codegen-llvm/inject-autocast.rs b/tests/codegen-llvm/inject-autocast.rs
@@ -4,7 +4,7 @@
 #![feature(link_llvm_intrinsics, abi_unadjusted, repr_simd, simd_ffi, portable_simd, f16)]
 #![crate_type = "lib"]
 
-use std::simd::i64x2;
+use std::simd::{f32x4, i16x8, i64x2};
 
 #[repr(simd)]
 pub struct Tile([i8; 1024]);
@@ -36,6 +36,19 @@ pub unsafe fn struct_with_i1_vector_autocast(a: i64x2, b: i64x2) -> (u8, u8) {
     foo(a, b)
 }
 
+// CHECK-LABEL: @bf16_vector_autocast
+#[no_mangle]
+pub unsafe fn bf16_vector_autocast(a: f32x4) -> i16x8 {
+    extern "unadjusted" {
+        #[link_name = "llvm.x86.vcvtneps2bf16128"]
+        fn foo(a: f32x4) -> i16x8;
+    }
+
+    // CHECK: [[A:%[0-9]+]] = call <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float> {{.*}})
+    // CHECK: bitcast <8 x bfloat> [[A]] to <8 x i16>
+    foo(a)
+}
+
 // CHECK-LABEL: @struct_autocast
 #[no_mangle]
 pub unsafe fn struct_autocast(key_metadata: u32, key: i64x2) -> Bar {
@@ -77,6 +90,8 @@ pub unsafe fn i1_vector_autocast(a: f16x8) -> u8 {
 
 // CHECK: declare { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64>, <2 x i64>)
 
+// CHECK: declare <8 x bfloat> @llvm.x86.vcvtneps2bf16128(<4 x float>)
+
 // CHECK: declare { i32, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.x86.encodekey128(i32, <2 x i64>)
 
 // CHECK: declare <8 x i1> @llvm.x86.avx512fp16.fpclass.ph.128(<8 x half>, i32 immarg)

Original file line number	Diff line number	Diff line change
`@@ -370,6 +370,8 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`370`	`370`	`}`
`371`	`371`
`372`	`372`	`match self.type_kind(llvm_ty) {`
	`373`	`+ TypeKind::BFloat => rust_ty == self.type_i16(),`
	`374`	`+`
`373`	`375`	`// Some LLVM intrinsics return non-packed structs, but they can't be mimicked from Rust`
`374`	`376`	`// due to auto field-alignment in non-packed structs (packed structs are represented in LLVM`
`375`	`377`	`// as, well, packed structs, so they won't match with those either)`
`@@ -387,11 +389,18 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`387`	`389`	`},`
`388`	`390`	`)`
`389`	`391`	`}`
`390`		`- TypeKind::Vector if self.element_type(llvm_ty) == self.type_i1() => {`
	`392`	`+ TypeKind::Vector => {`
`391`	`393`	`let element_count = self.vector_length(llvm_ty) as u64;`
`392`		`- let int_width = element_count.next_power_of_two().max(8);`
	`394`	`+ let llvm_element_ty = self.element_type(llvm_ty);`
`393`	`395`
`394`		`- rust_ty == self.type_ix(int_width)`
	`396`	`+ if llvm_element_ty == self.type_bf16() {`
	`397`	`+ rust_ty == self.type_vector(self.type_i16(), element_count)`
	`398`	`+ } else if llvm_element_ty == self.type_i1() {`
	`399`	`+ let int_width = element_count.next_power_of_two().max(8);`
	`400`	`+ rust_ty == self.type_ix(int_width)`
	`401`	`+ } else {`
	`402`	`+ false`
	`403`	`+ }`
`395`	`404`	`}`
`396`	`405`	`_ => false,`
`397`	`406`	`}`
Original file line number	Diff line number	Diff line change
`@@ -1761,7 +1761,7 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {`
`1761`	`1761`	`self.zext_i1_vector_to_int(val, src_ty, dest_ty)`
`1762`	`1762`	`}`
`1763`	`1763`	`}`
`1764`		`- _ => unreachable!(),`
	`1764`	+ _ => self.bitcast(val, dest_ty), // for `bf16(xN)` <-> `u16(xN)`
`1765`	`1765`	`}`
`1766`	`1766`	`}`
`1767`	`1767`
Original file line number	Diff line number	Diff line change
`@@ -174,6 +174,10 @@ impl<'ll, CX: Borrow<SCx<'ll>>> GenericCx<'ll, CX> {`
`174`	`174`	`)`
`175`	`175`	`}`
`176`	`176`	`}`
	`177`	`+`
	`178`	`+ pub(crate) fn type_bf16(&self) -> &'ll Type {`
	`179`	`+ unsafe { llvm::LLVMBFloatTypeInContext(self.llcx()) }`
	`180`	`+ }`
`177`	`181`	`}`
`178`	`182`
`179`	`183`	`impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {`
`@@ -247,7 +251,7 @@ impl<'ll, CX: Borrow<SCx<'ll>>> BaseTypeCodegenMethods for GenericCx<'ll, CX> {`
`247`	`251`
`248`	`252`	`fn float_width(&self, ty: &'ll Type) -> usize {`
`249`	`253`	`match self.type_kind(ty) {`
`250`		`- TypeKind::Half => 16,`
	`254`	`+ TypeKind::Half \| TypeKind::BFloat => 16,`
`251`	`255`	`TypeKind::Float => 32,`
`252`	`256`	`TypeKind::Double => 64,`
`253`	`257`	`TypeKind::X86_FP80 => 80,`