diff --git a/coresimd/src/x86/i586/sse2.rs b/coresimd/src/x86/i586/sse2.rs index 9fd59405144e5..d7849ea89de26 100644 --- a/coresimd/src/x86/i586/sse2.rs +++ b/coresimd/src/x86/i586/sse2.rs @@ -697,7 +697,7 @@ pub unsafe fn _mm_cvtps_epi32(a: f32x4) -> i32x4 { /// `0`. #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +#[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] pub unsafe fn _mm_cvtsi32_si128(a: i32) -> i32x4 { i32x4::new(a, 0, 0, 0) } @@ -705,7 +705,7 @@ pub unsafe fn _mm_cvtsi32_si128(a: i32) -> i32x4 { /// Return the lowest element of `a`. #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +#[cfg_attr(all(test, not(windows)), assert_instr(movd))] // FIXME mov on windows pub unsafe fn _mm_cvtsi128_si32(a: i32x4) -> i32 { a.extract(0) } @@ -826,7 +826,11 @@ pub unsafe fn _mm_setzero_si128() -> __m128i { /// Load 64-bit integer from memory into first element of returned vector. #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +// FIXME movsd on windows +#[cfg_attr(all(test, not(windows), + not(all(target_os = "linux", target_arch = "x86_64")), + target_arch = "x86_64"), + assert_instr(movq))] pub unsafe fn _mm_loadl_epi64(mem_addr: *const i64x2) -> i64x2 { i64x2::new((*mem_addr).extract(0), 0) } @@ -901,7 +905,11 @@ pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { /// `mem_addr` does not need to be aligned on any particular boundary. #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +// FIXME mov on windows, movlps on i686 +#[cfg_attr(all(test, not(windows), + not(all(target_os = "linux", target_arch = "x86_64")), + target_arch = "x86_64"), + assert_instr(movq))] pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { ptr::copy_nonoverlapping( &a as *const _ as *const u8, @@ -934,7 +942,9 @@ pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { /// element is zero. #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +// FIXME movd on windows, movd on i686 +#[cfg_attr(all(test, not(windows), target_arch = "x86_64"), + assert_instr(movq))] pub unsafe fn _mm_move_epi64(a: i64x2) -> i64x2 { simd_shuffle2(a, i64x2::splat(0), [0, 2]) } @@ -1752,7 +1762,7 @@ pub unsafe fn _mm_cvtsd_ss(a: f32x4, b: f64x2) -> f32x4 { /// Return the lower double-precision (64-bit) floating-point element of "a". #[inline(always)] #[target_feature = "+sse2"] -// no particular instruction to test +#[cfg_attr(all(test, windows), assert_instr(movsd))] // FIXME movq/movlps/mov on other platform pub unsafe fn _mm_cvtsd_f64(a: f64x2) -> f64 { a.extract(0) } @@ -1839,6 +1849,7 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 { /// zeros. #[inline(always)] #[target_feature = "+sse2"] +#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected pub unsafe fn _mm_setzero_pd() -> f64x2 { f64x2::splat(0_f64) } @@ -1991,6 +2002,7 @@ pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: f64x2) { /// into both elements of returned vector. #[inline(always)] #[target_feature = "+sse2"] +//#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 { let d = *mem_addr; f64x2::new(d, d) @@ -2000,6 +2012,7 @@ pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> f64x2 { /// into both elements of returned vector. #[inline(always)] #[target_feature = "+sse2"] +//#[cfg_attr(test, assert_instr(movapd))] FIXME movapd expected pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> f64x2 { let d = *mem_addr; f64x2::new(d, d) diff --git a/coresimd/src/x86/i586/sse3.rs b/coresimd/src/x86/i586/sse3.rs index 106d76e798cb8..c582bdbf50093 100644 --- a/coresimd/src/x86/i586/sse3.rs +++ b/coresimd/src/x86/i586/sse3.rs @@ -83,6 +83,7 @@ pub unsafe fn _mm_movedup_pd(a: f64x2) -> f64x2 { /// into both elements of return vector. #[inline(always)] #[target_feature = "+sse3"] +#[cfg_attr(test, assert_instr(movddup))] pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> f64x2 { use x86::i586::sse2::_mm_load1_pd; _mm_load1_pd(mem_addr) diff --git a/coresimd/src/x86/i686/ssse3.rs b/coresimd/src/x86/i686/ssse3.rs index e117ef441cd9b..ac20ac748c156 100644 --- a/coresimd/src/x86/i686/ssse3.rs +++ b/coresimd/src/x86/i686/ssse3.rs @@ -88,9 +88,9 @@ pub unsafe fn _mm_hadds_pi16(a: i16x4, b: i16x4) -> i16x4 { /// packed 64-bit vectors of [4 x i16]. #[inline(always)] #[target_feature = "+ssse3"] -#[cfg_attr(test, assert_instr(phsubsw))] +#[cfg_attr(test, assert_instr(phsubw))] pub unsafe fn _mm_hsub_pi16(a: i16x4, b: i16x4) -> i16x4 { - mem::transmute(phsubsw(mem::transmute(a), mem::transmute(b))) + mem::transmute(phsubw(mem::transmute(a), mem::transmute(b))) } /// Horizontally subtracts the adjacent pairs of values contained in 2