diff --git a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs index f787b8a6fd94a..e0ebe30752afa 100644 --- a/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs +++ b/compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs @@ -415,7 +415,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>( }); } - sym::simd_fma => { + // FIXME: simd_relaxed_fma doesn't relax to non-fused multiply-add + sym::simd_fma | sym::simd_relaxed_fma => { intrinsic_args!(fx, args => (a, b, c); intrinsic); if !a.layout().ty.is_simd() { diff --git a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs index 604678a9af4c7..79d1a06dd467f 100644 --- a/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs +++ b/compiler/rustc_codegen_gcc/src/intrinsic/simd.rs @@ -772,6 +772,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>( sym::simd_flog => "log", sym::simd_floor => "floor", sym::simd_fma => "fma", + sym::simd_relaxed_fma => "fma", // FIXME: this should relax to non-fused multiply-add when necessary sym::simd_fpowi => "__builtin_powi", sym::simd_fpow => "pow", sym::simd_fsin => "sin", diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index da7f94e8cf71d..d8b055137b359 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -1534,6 +1534,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( sym::simd_flog => ("log", bx.type_func(&[vec_ty], vec_ty)), sym::simd_floor => ("floor", bx.type_func(&[vec_ty], vec_ty)), sym::simd_fma => ("fma", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)), + sym::simd_relaxed_fma => ("fmuladd", bx.type_func(&[vec_ty, vec_ty, vec_ty], vec_ty)), sym::simd_fpowi => ("powi", bx.type_func(&[vec_ty, bx.type_i32()], vec_ty)), sym::simd_fpow => ("pow", bx.type_func(&[vec_ty, vec_ty], vec_ty)), sym::simd_fsin => ("sin", bx.type_func(&[vec_ty], vec_ty)), @@ -1572,6 +1573,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>( | sym::simd_fpowi | sym::simd_fsin | sym::simd_fsqrt + | sym::simd_relaxed_fma | sym::simd_round | sym::simd_trunc ) { diff --git a/compiler/rustc_hir_analysis/src/check/intrinsic.rs b/compiler/rustc_hir_analysis/src/check/intrinsic.rs index 3e33120901f69..7434bbf180b97 100644 --- a/compiler/rustc_hir_analysis/src/check/intrinsic.rs +++ b/compiler/rustc_hir_analysis/src/check/intrinsic.rs @@ -641,7 +641,9 @@ pub fn check_intrinsic_type( | sym::simd_round | sym::simd_trunc => (1, 0, vec![param(0)], param(0)), sym::simd_fpowi => (1, 0, vec![param(0), tcx.types.i32], param(0)), - sym::simd_fma => (1, 0, vec![param(0), param(0), param(0)], param(0)), + sym::simd_fma | sym::simd_relaxed_fma => { + (1, 0, vec![param(0), param(0), param(0)], param(0)) + } sym::simd_gather => (3, 0, vec![param(0), param(1), param(2)], param(0)), sym::simd_masked_load => (3, 0, vec![param(0), param(1), param(2)], param(2)), sym::simd_masked_store => (3, 0, vec![param(0), param(1), param(2)], tcx.types.unit), diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index e94c0a5ea6e7f..5fa8ce835edad 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1844,6 +1844,7 @@ symbols! { simd_reduce_mul_unordered, simd_reduce_or, simd_reduce_xor, + simd_relaxed_fma, simd_rem, simd_round, simd_saturating_add, diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index 5ddca9c4dce88..0d24b0558c594 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -612,6 +612,20 @@ extern "rust-intrinsic" { #[rustc_nounwind] pub fn simd_fma(x: T, y: T, z: T) -> T; + /// Computes `(x*y) + z` for each element, non-deterministically executing either + /// a fused multiply-add or two operations with rounding of the intermediate result. + /// + /// The operation is fused if the code generator determines that target instruction + /// set has support for a fused operation, and that the fused operation is more efficient + /// than the equivalent, separate pair of mul and add instructions. It is unspecified + /// whether or not a fused operation is selected, and that may depend on optimization + /// level and context, for example. + /// + /// `T` must be a vector of floats. + #[cfg(not(bootstrap))] + #[rustc_nounwind] + pub fn simd_relaxed_fma(x: T, y: T, z: T) -> T; + // Computes the sine of each element. /// /// `T` must be a vector of floats. diff --git a/tests/ui/simd/intrinsic/float-math-pass.rs b/tests/ui/simd/intrinsic/float-math-pass.rs index 9b14d410acbe3..24b9941133ee7 100644 --- a/tests/ui/simd/intrinsic/float-math-pass.rs +++ b/tests/ui/simd/intrinsic/float-math-pass.rs @@ -23,6 +23,7 @@ extern "rust-intrinsic" { fn simd_fexp(x: T) -> T; fn simd_fexp2(x: T) -> T; fn simd_fma(x: T, y: T, z: T) -> T; + fn simd_relaxed_fma(x: T, y: T, z: T) -> T; fn simd_flog(x: T) -> T; fn simd_flog10(x: T) -> T; fn simd_flog2(x: T) -> T; @@ -77,6 +78,9 @@ fn main() { let r = simd_fma(x, h, h); assert_approx_eq!(x, r); + let r = simd_relaxed_fma(x, h, h); + assert_approx_eq!(x, r); + let r = simd_fsqrt(x); assert_approx_eq!(x, r);