diff --git a/ci/docker/wasm32-wasi/Dockerfile b/ci/docker/wasm32-wasi/Dockerfile index 3e250f8b50..e8aa8e0b46 100644 --- a/ci/docker/wasm32-wasi/Dockerfile +++ b/ci/docker/wasm32-wasi/Dockerfile @@ -7,11 +7,10 @@ RUN apt-get update -y && apt-get install -y --no-install-recommends \ xz-utils \ clang -RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/v0.29.0/wasmtime-v0.29.0-x86_64-linux.tar.xz | tar xJf - -ENV PATH=$PATH:/wasmtime-v0.29.0-x86_64-linux +RUN curl -L https://github.com/bytecodealliance/wasmtime/releases/download/dev/wasmtime-dev-$(arch)-linux.tar.xz | tar xJf - +ENV PATH=$PATH:/wasmtime-dev-$(arch)-linux ENV CARGO_TARGET_WASM32_WASI_RUNNER="wasmtime \ - --enable-simd \ - --enable-threads \ + --wasm-features=threads,relaxed-simd \ --mapdir .::/checkout/target/wasm32-wasi/release/deps \ --" diff --git a/crates/core_arch/src/wasm32/mod.rs b/crates/core_arch/src/wasm32/mod.rs index 2fbe80e995..ed5fc01ddf 100644 --- a/crates/core_arch/src/wasm32/mod.rs +++ b/crates/core_arch/src/wasm32/mod.rs @@ -9,6 +9,9 @@ pub use self::atomic::*; mod simd128; pub use self::simd128::*; +mod relaxed_simd; +pub use self::relaxed_simd::*; + mod memory; pub use self::memory::*; diff --git a/crates/core_arch/src/wasm32/relaxed_simd.rs b/crates/core_arch/src/wasm32/relaxed_simd.rs new file mode 100644 index 0000000000..8fe935d1f7 --- /dev/null +++ b/crates/core_arch/src/wasm32/relaxed_simd.rs @@ -0,0 +1,449 @@ +use super::v128; +use crate::core_arch::simd; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.wasm.relaxed.swizzle"] + fn llvm_relaxed_swizzle(a: simd::i8x16, b: simd::i8x16) -> simd::i8x16; + #[link_name = "llvm.wasm.relaxed.trunc.signed"] + fn llvm_relaxed_trunc_signed(a: simd::f32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.unsigned"] + fn llvm_relaxed_trunc_unsigned(a: simd::f32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.signed.zero"] + fn llvm_relaxed_trunc_signed_zero(a: simd::f64x2) -> simd::i32x4; + #[link_name = "llvm.wasm.relaxed.trunc.unsigned.zero"] + fn llvm_relaxed_trunc_unsigned_zero(a: simd::f64x2) -> simd::i32x4; + + #[link_name = "llvm.wasm.fma.v4f32"] + fn llvm_f32x4_fma(a: simd::f32x4, b: simd::f32x4, c: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.fms.v4f32"] + fn llvm_f32x4_fms(a: simd::f32x4, b: simd::f32x4, c: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.fma.v2f64"] + fn llvm_f64x2_fma(a: simd::f64x2, b: simd::f64x2, c: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.wasm.fms.v2f64"] + fn llvm_f64x2_fms(a: simd::f64x2, b: simd::f64x2, c: simd::f64x2) -> simd::f64x2; + + #[link_name = "llvm.wasm.laneselect.v16i8"] + fn llvm_i8x16_laneselect(a: simd::i8x16, b: simd::i8x16, c: simd::i8x16) -> simd::i8x16; + #[link_name = "llvm.wasm.laneselect.v8i16"] + fn llvm_i16x8_laneselect(a: simd::i16x8, b: simd::i16x8, c: simd::i16x8) -> simd::i16x8; + #[link_name = "llvm.wasm.laneselect.v4i32"] + fn llvm_i32x4_laneselect(a: simd::i32x4, b: simd::i32x4, c: simd::i32x4) -> simd::i32x4; + #[link_name = "llvm.wasm.laneselect.v2i64"] + fn llvm_i64x2_laneselect(a: simd::i64x2, b: simd::i64x2, c: simd::i64x2) -> simd::i64x2; + + #[link_name = "llvm.wasm.relaxed.min.v4f32"] + fn llvm_f32x4_relaxed_min(a: simd::f32x4, b: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.min.v2f64"] + fn llvm_f64x2_relaxed_min(a: simd::f64x2, b: simd::f64x2) -> simd::f64x2; + #[link_name = "llvm.wasm.relaxed.max.v4f32"] + fn llvm_f32x4_relaxed_max(a: simd::f32x4, b: simd::f32x4) -> simd::f32x4; + #[link_name = "llvm.wasm.relaxed.max.v2f64"] + fn llvm_f64x2_relaxed_max(a: simd::f64x2, b: simd::f64x2) -> simd::f64x2; + + #[link_name = "llvm.wasm.relaxed.q15mulr.signed"] + fn llvm_relaxed_q15mulr_signed(a: simd::i16x8, b: simd::i16x8) -> simd::i16x8; + #[link_name = "llvm.wasm.dot.i8x16.i7x16.signed"] + fn llvm_i16x8_relaxed_dot_i8x16_i7x16_s(a: simd::i8x16, b: simd::i8x16) -> simd::i16x8; + #[link_name = "llvm.wasm.dot.i8x16.i7x16.add.signed"] + fn llvm_i32x4_relaxed_dot_i8x16_i7x16_add_s( + a: simd::i8x16, + b: simd::i8x16, + c: simd::i32x4, + ) -> simd::i32x4; +} + +/// A relaxed version of `i8x16_swizzle(a, s)` which selects lanes from `a` +/// using indices in `s`. +/// +/// Indices in the range `[0,15]` will select the `i`-th element of `a`. +/// If the high bit of any element of `s` is set (meaning 128 or greater) then +/// the corresponding output lane is guaranteed to be zero. Otherwise if the +/// element of `s` is within the range `[16,128)` then the output lane is either +/// 0 or `a[s[i] % 16]` depending on the implementation. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.relaxed_swizzle))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i8x16.relaxed_swizzle"))] +pub fn i8x16_relaxed_swizzle(a: v128, s: v128) -> v128 { + unsafe { llvm_relaxed_swizzle(a.as_i8x16(), s.as_i8x16()).v128() } +} + +/// A relaxed version of `i32x4_trunc_sat_f32x4(a)` converts the `f32` lanes +/// of `a` to signed 32-bit integers. +/// +/// Values which don't fit in 32-bit integers or are NaN may have the same +/// result as `i32x4_trunc_sat_f32x4` or may return `i32::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f32x4_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f32x4_s"))] +pub fn i32x4_relaxed_trunc_f32x4(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_signed(a.as_f32x4()).v128() } +} + +/// A relaxed version of `u32x4_trunc_sat_f32x4(a)` converts the `f32` lanes +/// of `a` to unsigned 32-bit integers. +/// +/// Values which don't fit in 32-bit unsigned integers or are NaN may have the +/// same result as `u32x4_trunc_sat_f32x4` or may return `u32::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f32x4_u))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f32x4_u"))] +pub fn u32x4_relaxed_trunc_f32x4(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_unsigned(a.as_f32x4()).v128() } +} + +/// A relaxed version of `i32x4_trunc_sat_f64x2_zero(a)` converts the `f64` +/// lanes of `a` to signed 32-bit integers and the upper two lanes are zero. +/// +/// Values which don't fit in 32-bit integers or are NaN may have the same +/// result as `i32x4_trunc_sat_f32x4` or may return `i32::MIN`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f64x2_s_zero))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f64x2_s_zero"))] +pub fn i32x4_relaxed_trunc_f64x2_zero(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_signed_zero(a.as_f64x2()).v128() } +} + +/// A relaxed version of `u32x4_trunc_sat_f64x2_zero(a)` converts the `f64` +/// lanes of `a` to unsigned 32-bit integers and the upper two lanes are zero. +/// +/// Values which don't fit in 32-bit unsigned integers or are NaN may have the +/// same result as `u32x4_trunc_sat_f32x4` or may return `u32::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_trunc_f64x2_u_zero))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_trunc_f64x2_u_zero"))] +pub fn u32x4_relaxed_trunc_f64x2_zero(a: v128) -> v128 { + unsafe { llvm_relaxed_trunc_unsigned_zero(a.as_f64x2()).v128() } +} + +/// Computes `a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_madd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_madd"))] +pub fn f32x4_relaxed_madd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f32x4_fma(a.as_f32x4(), b.as_f32x4(), c.as_f32x4()).v128() } +} + +/// Computes `-a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_nmadd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_nmadd"))] +pub fn f32x4_relaxed_nmadd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f32x4_fms(a.as_f32x4(), b.as_f32x4(), c.as_f32x4()).v128() } +} + +/// Computes `a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_madd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_madd"))] +pub fn f64x2_relaxed_madd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f64x2_fma(a.as_f64x2(), b.as_f64x2(), c.as_f64x2()).v128() } +} + +/// Computes `-a * b + c` with either one rounding or two roundings. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_nmadd))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_nmadd"))] +pub fn f64x2_relaxed_nmadd(a: v128, b: v128, c: v128) -> v128 { + unsafe { llvm_f64x2_fms(a.as_f64x2(), b.as_f64x2(), c.as_f64x2()).v128() } +} + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i8x16.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i8x16.relaxed_laneselect"))] +pub fn i8x16_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i8x16_laneselect(a.as_i8x16(), b.as_i8x16(), m.as_i8x16()).v128() } +} + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_laneselect"))] +pub fn i16x8_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i16x8_laneselect(a.as_i16x8(), b.as_i16x8(), m.as_i16x8()).v128() } +} + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_laneselect"))] +pub fn i32x4_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i32x4_laneselect(a.as_i32x4(), b.as_i32x4(), m.as_i32x4()).v128() } +} + +/// A relaxed version of `v128_bitselect` where this either behaves the same as +/// `v128_bitselect` or the high bit of each lane `m` is inspected and the +/// corresponding lane of `a` is chosen if the bit is 1 or the lane of `b` is +/// chosen if it's zero. +/// +/// If the `m` mask's lanes are either all-one or all-zero then this instruction +/// is the same as `v128_bitselect`. +#[inline] +#[cfg_attr(test, assert_instr(i64x2.relaxed_laneselect))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i64x2.relaxed_laneselect"))] +pub fn i64x2_relaxed_laneselect(a: v128, b: v128, m: v128) -> v128 { + unsafe { llvm_i64x2_laneselect(a.as_i64x2(), b.as_i64x2(), m.as_i64x2()).v128() } +} + +/// A relaxed version of `f32x4_min` which is either `f32x4_min` or +/// `f32x4_pmin`. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_min))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_min"))] +pub fn f32x4_relaxed_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_relaxed_min(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// A relaxed version of `f32x4_max` which is either `f32x4_max` or +/// `f32x4_pmax`. +#[inline] +#[cfg_attr(test, assert_instr(f32x4.relaxed_max))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f32x4.relaxed_max"))] +pub fn f32x4_relaxed_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f32x4_relaxed_max(a.as_f32x4(), b.as_f32x4()).v128() } +} + +/// A relaxed version of `f64x2_min` which is either `f64x2_min` or +/// `f64x2_pmin`. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_min))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_min"))] +pub fn f64x2_relaxed_min(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_relaxed_min(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// A relaxed version of `f64x2_max` which is either `f64x2_max` or +/// `f64x2_pmax`. +#[inline] +#[cfg_attr(test, assert_instr(f64x2.relaxed_max))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("f64x2.relaxed_max"))] +pub fn f64x2_relaxed_max(a: v128, b: v128) -> v128 { + unsafe { llvm_f64x2_relaxed_max(a.as_f64x2(), b.as_f64x2()).v128() } +} + +/// A relaxed version of `i16x8_relaxed_q15mulr` where if both lanes are +/// `i16::MIN` then the result is either `i16::MIN` or `i16::MAX`. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_q15mulr_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_q15mulr_s"))] +pub fn i16x8_relaxed_q15mulr(a: v128, b: v128) -> v128 { + unsafe { llvm_relaxed_q15mulr_signed(a.as_i16x8(), b.as_i16x8()).v128() } +} + +/// A relaxed dot-product instruction. +/// +/// This instruction will perform pairwise products of the 8-bit values in `a` +/// and `b` and then accumulate adjacent pairs into 16-bit results producing a +/// final `i16x8` vector. The bytes of `a` are always interpreted as signed and +/// the bytes in `b` may be interpreted as signed or unsigned. If the top bit in +/// `b` isn't set then the value is the same regardless of whether it's signed +/// or unsigned. +/// +/// The accumulation into 16-bit values may be saturated on some platforms, and +/// on other platforms it may wrap-around on overflow. +#[inline] +#[cfg_attr(test, assert_instr(i16x8.relaxed_dot_i8x16_i7x16_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i16x8.relaxed_dot_i8x16_i7x16_s"))] +pub fn i16x8_relaxed_dot_i8x16_i7x16(a: v128, b: v128) -> v128 { + unsafe { llvm_i16x8_relaxed_dot_i8x16_i7x16_s(a.as_i8x16(), b.as_i8x16()).v128() } +} + +/// Similar to [`i16x8_relaxed_dot_i8x16_i7x16`] except that the intermediate +/// `i16x8` result is fed into `i32x4_extadd_pairwise_i16x8` followed by +/// `i32x4_add` to add the value `c` to the result. +#[inline] +#[cfg_attr(test, assert_instr(i32x4.relaxed_dot_i8x16_i7x16_add_s))] +#[target_feature(enable = "relaxed-simd")] +#[doc(alias("i32x4.relaxed_dot_i8x16_i7x16_add_s"))] +pub fn i32x4_relaxed_dot_i8x16_i7x16_add(a: v128, b: v128, c: v128) -> v128 { + unsafe { + llvm_i32x4_relaxed_dot_i8x16_i7x16_add_s(a.as_i8x16(), b.as_i8x16(), c.as_i32x4()).v128() + } +} + +#[cfg(test)] +pub mod tests { + use super::super::simd128::*; + use super::*; + use core::ops::{Add, Div, Mul, Neg, Sub}; + use std; + use std::fmt::Debug; + use std::mem::transmute; + use std::num::Wrapping; + use std::prelude::v1::*; + + fn compare_bytes(a: v128, b: &[v128]) { + let a: [u8; 16] = unsafe { transmute(a) }; + if b.iter().any(|b| { + let b: [u8; 16] = unsafe { transmute(*b) }; + a == b + }) { + return; + } + eprintln!("input vector {a:?}"); + eprintln!("did not match any output:"); + for b in b { + eprintln!(" {b:?}"); + } + } + + #[test] + fn test_relaxed_swizzle() { + compare_bytes( + i8x16_relaxed_swizzle( + i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + i8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1), + ), + &[i8x16(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1)], + ); + compare_bytes( + i8x16_relaxed_swizzle( + i8x16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15), + u8x16(0x80, 0xff, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + ), + &[ + i8x16(0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + i8x16(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + ], + ); + } + + #[test] + fn test_relaxed_trunc() { + compare_bytes( + i32x4_relaxed_trunc_f32x4(f32x4(1.0, 2.0, -1., -4.)), + &[i32x4(1, 2, -1, -4)], + ); + compare_bytes( + i32x4_relaxed_trunc_f32x4(f32x4(f32::NEG_INFINITY, f32::NAN, -0.0, f32::INFINITY)), + &[ + i32x4(i32::MIN, 0, 0, i32::MAX), + i32x4(i32::MIN, i32::MIN, 0, i32::MIN), + ], + ); + compare_bytes( + i32x4_relaxed_trunc_f64x2_zero(f64x2(1.0, -3.0)), + &[i32x4(1, -3, 0, 0)], + ); + compare_bytes( + i32x4_relaxed_trunc_f64x2_zero(f64x2(f64::INFINITY, f64::NAN)), + &[i32x4(i32::MAX, 0, 0, 0), i32x4(i32::MIN, i32::MIN, 0, 0)], + ); + + compare_bytes( + u32x4_relaxed_trunc_f32x4(f32x4(1.0, 2.0, 5., 100.)), + &[i32x4(1, 2, 5, 100)], + ); + compare_bytes( + u32x4_relaxed_trunc_f32x4(f32x4(f32::NEG_INFINITY, f32::NAN, -0.0, f32::INFINITY)), + &[ + u32x4(u32::MAX, 0, 0, u32::MAX), + u32x4(u32::MAX, u32::MAX, 0, u32::MAX), + ], + ); + compare_bytes( + u32x4_relaxed_trunc_f64x2_zero(f64x2(1.0, 3.0)), + &[u32x4(1, 3, 0, 0)], + ); + compare_bytes( + u32x4_relaxed_trunc_f64x2_zero(f64x2(f64::INFINITY, f64::NAN)), + &[i32x4(i32::MAX, 0, 0, 0), i32x4(i32::MIN, i32::MIN, 0, 0)], + ); + } + + #[test] + fn test_madd() { + let floats = [ + f32::NAN, + f32::NEG_INFINITY, + f32::INFINITY, + 1.0, + 2.0, + -1.0, + 0.0, + 100.3, + 7.8, + 9.4, + ]; + for &a in floats.iter() { + for &b in floats.iter() { + for &c in floats.iter() { + let f1 = a * b + c; + let f2 = a.mul_add(b, c); + compare_bytes( + f32x4_relaxed_madd(f32x4(a, a, a, a), f32x4(b, b, b, b), f32x4(c, c, c, c)), + &[f32x4(f1, f1, f1, f1), f32x4(f2, f2, f2, f2)], + ); + + let f1 = -a * b + c; + let f2 = (-a).mul_add(b, c); + compare_bytes( + f32x4_relaxed_nmadd( + f32x4(a, a, a, a), + f32x4(b, b, b, b), + f32x4(c, c, c, c), + ), + &[f32x4(f1, f1, f1, f1), f32x4(f2, f2, f2, f2)], + ); + + let a = f64::from(a); + let b = f64::from(b); + let c = f64::from(c); + let f1 = a * b + c; + let f2 = a.mul_add(b, c); + compare_bytes( + f64x2_relaxed_madd(f64x2(a, a), f64x2(b, b), f64x2(c, c)), + &[f64x2(f1, f1), f64x2(f2, f2)], + ); + let f1 = -a * b + c; + let f2 = (-a).mul_add(b, c); + compare_bytes( + f64x2_relaxed_nmadd(f64x2(a, a), f64x2(b, b), f64x2(c, c)), + &[f64x2(f1, f1), f64x2(f2, f2)], + ); + } + } + } + } +} diff --git a/crates/core_arch/src/wasm32/simd128.rs b/crates/core_arch/src/wasm32/simd128.rs index 01d09caa6c..5220fa74fb 100644 --- a/crates/core_arch/src/wasm32/simd128.rs +++ b/crates/core_arch/src/wasm32/simd128.rs @@ -46,7 +46,7 @@ macro_rules! conversions { impl v128 { $( #[inline(always)] - fn $name(self) -> $ty { + pub(crate) fn $name(self) -> $ty { unsafe { mem::transmute(self) } } )* @@ -55,7 +55,7 @@ macro_rules! conversions { impl $ty { #[inline(always)] #[rustc_const_stable(feature = "wasm_simd_const", since = "1.56.0")] - const fn v128(self) -> v128 { + pub(crate) const fn v128(self) -> v128 { unsafe { mem::transmute(self) } } } diff --git a/crates/stdarch-test/Cargo.toml b/crates/stdarch-test/Cargo.toml index 23bddeda65..ce5705c6e9 100644 --- a/crates/stdarch-test/Cargo.toml +++ b/crates/stdarch-test/Cargo.toml @@ -18,7 +18,7 @@ cfg-if = "1.0" # time, and we want to make updates to this explicit rather than automatically # picking up updates which might break CI with new instruction names. [target.'cfg(target_arch = "wasm32")'.dependencies] -wasmprinter = "=0.2.24" +wasmprinter = "=0.2.53" [features] default = []