diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs index db17be9f21..aa5a5d8c18 100644 --- a/crates/core_arch/src/x86/avx.rs +++ b/crates/core_arch/src/x86/avx.rs @@ -52,9 +52,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME: Should be 'vandpd' instruction. // See https://github.com/rust-lang/stdarch/issues/71 -#[cfg_attr(test, assert_instr(vandps))] +#[cfg_attr(test, assert_instr(vandp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); @@ -82,9 +81,8 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME: should be `vorpd` instruction. // See . -#[cfg_attr(test, assert_instr(vorps))] +#[cfg_attr(test, assert_instr(vorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); @@ -162,8 +160,7 @@ pub unsafe fn _mm256_shuffle_ps(a: __m256, b: __m256) -> __m256 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME: should be `vandnpd` instruction. -#[cfg_attr(test, assert_instr(vandnps))] +#[cfg_attr(test, assert_instr(vandnp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); @@ -615,8 +612,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd) #[inline] #[target_feature(enable = "avx")] -// FIXME Should be 'vxorpd' instruction. -#[cfg_attr(test, assert_instr(vxorps))] +#[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d { let a: u64x4 = transmute(a); @@ -995,6 +991,29 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i) -> __m128i { transmute(dst) } +/// Extracts a 32-bit integer from `a`, selected with `INDEX`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32) +#[inline] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { + static_assert_uimm_bits!(INDEX, 3); + simd_extract!(a.as_i32x8(), INDEX as u32) +} + +/// Returns the first element of the input vector of `[8 x i32]`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32) +#[inline] +#[target_feature(enable = "avx")] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { + simd_extract!(a.as_i32x8(), 0) +} + /// Zeroes the contents of all XMM or YMM registers. /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall) @@ -1378,7 +1397,7 @@ pub unsafe fn _mm256_insert_epi32(a: __m256i, i: i32) -> __m25 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected +#[cfg_attr(test, assert_instr(vmovap))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { @@ -1393,7 +1412,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected +#[cfg_attr(test, assert_instr(vmovap))] #[stable(feature = "simd_x86", since = "1.27.0")] #[allow(clippy::cast_ptr_alignment)] pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) { @@ -1437,7 +1456,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected +#[cfg_attr(test, assert_instr(vmovup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { let mut dst = _mm256_undefined_pd(); @@ -1456,7 +1475,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected +#[cfg_attr(test, assert_instr(vmovup))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) { mem_addr.cast::<__m256d>().write_unaligned(a); @@ -2145,7 +2164,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd) #[inline] #[target_feature(enable = "avx")] -#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected +#[cfg_attr(test, assert_instr(vxorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_setzero_pd() -> __m256d { _mm256_set1_pd(0.0) @@ -2676,8 +2695,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { - // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1]) - simd_shuffle!(a, a, [0, 1, 2, 3, 0, 0, 0, 0]) + simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Casts vector of type __m128d to type __m256d; @@ -2690,8 +2708,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 { // instructions, thus it has zero latency. #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { - // FIXME simd_shuffle!(a, a, [0, 1, -1, -1]) - simd_shuffle!(a, a, [0, 1, 0, 0]) + simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2]) } /// Casts vector of type __m128i to type __m256i; @@ -2705,8 +2722,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d { #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i { let a = a.as_i64x2(); - // FIXME simd_shuffle!(a, a, [0, 1, -1, -1]) - let dst: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 0]); + let undefined = _mm_undefined_si128().as_i64x2(); + let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]); transmute(dst) } @@ -3719,6 +3736,22 @@ mod tests { assert_eq_m128i(r, e); } + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extract_epi32() { + let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7); + let r1 = _mm256_extract_epi32::<0>(a); + let r2 = _mm256_extract_epi32::<3>(a); + assert_eq!(r1, -1); + assert_eq!(r2, 3); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_cvtsi256_si32() { + let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_cvtsi256_si32(a); + assert_eq!(r, 1); + } + #[simd_test(enable = "avx")] #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri unsafe fn test_mm256_zeroall() { @@ -4698,6 +4731,27 @@ mod tests { assert_eq_m128i(r, _mm_setr_epi64x(1, 2)); } + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castps128_ps256() { + let a = _mm_setr_ps(1., 2., 3., 4.); + let r = _mm256_castps128_ps256(a); + assert_eq_m128(_mm256_castps256_ps128(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castpd128_pd256() { + let a = _mm_setr_pd(1., 2.); + let r = _mm256_castpd128_pd256(a); + assert_eq_m128d(_mm256_castpd256_pd128(r), a); + } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_castsi128_si256() { + let a = _mm_setr_epi32(1, 2, 3, 4); + let r = _mm256_castsi128_si256(a); + assert_eq_m128i(_mm256_castsi256_si128(r), a); + } + #[simd_test(enable = "avx")] unsafe fn test_mm256_zextps128_ps256() { let a = _mm_setr_ps(1., 2., 3., 4.); diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs index f3dd0c8e4a..faaee030e0 100644 --- a/crates/core_arch/src/x86/avx2.rs +++ b/crates/core_arch/src/x86/avx2.rs @@ -3610,29 +3610,6 @@ pub unsafe fn _mm256_extract_epi16(a: __m256i) -> i32 { simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32 } -/// Extracts a 32-bit integer from `a`, selected with `INDEX`. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32) -#[inline] -#[target_feature(enable = "avx2")] -// This intrinsic has no corresponding instruction. -#[rustc_legacy_const_generics(1)] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi32(a: __m256i) -> i32 { - static_assert_uimm_bits!(INDEX, 3); - simd_extract!(a.as_i32x8(), INDEX as u32) -} - -/// Returns the first element of the input vector of `[8 x i32]`. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32) -#[inline] -#[target_feature(enable = "avx2")] -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 { - simd_extract!(a.as_i32x8(), 0) -} - #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.avx2.phadd.w"] @@ -5749,20 +5726,4 @@ mod tests { assert_eq!(r1, 0xFFFF); assert_eq!(r2, 3); } - - #[simd_test(enable = "avx2")] - unsafe fn test_mm256_extract_epi32() { - let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7); - let r1 = _mm256_extract_epi32::<0>(a); - let r2 = _mm256_extract_epi32::<3>(a); - assert_eq!(r1, -1); - assert_eq!(r2, 3); - } - - #[simd_test(enable = "avx2")] - unsafe fn test_mm256_cvtsi256_si32() { - let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); - let r = _mm256_cvtsi256_si32(a); - assert_eq!(r, 1); - } } diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs index 08018bef86..ac88f0eca7 100644 --- a/crates/core_arch/src/x86/avx512dq.rs +++ b/crates/core_arch/src/x86/avx512dq.rs @@ -68,7 +68,7 @@ pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=293) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction. +#[cfg_attr(test, assert_instr(vandp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) @@ -267,7 +267,7 @@ pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction. +#[cfg_attr(test, assert_instr(vandnp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d { _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b) @@ -465,7 +465,7 @@ pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction. +#[cfg_attr(test, assert_instr(vorp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) @@ -662,7 +662,7 @@ pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102) #[inline] #[target_feature(enable = "avx512dq")] -#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction. +#[cfg_attr(test, assert_instr(vxorp))] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d { transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b))) @@ -5211,7 +5211,7 @@ pub unsafe fn _mm512_maskz_range_ps(k: __mmask16, a: __m512, b: /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_sd&ig_expand=5216) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5236,7 +5236,7 @@ pub unsafe fn _mm_range_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5270,7 +5270,7 @@ pub unsafe fn _mm_mask_range_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5295,7 +5295,7 @@ pub unsafe fn _mm_maskz_range_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5327,7 +5327,7 @@ pub unsafe fn _mm_mask_range_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5347,7 +5347,7 @@ pub unsafe fn _mm_maskz_range_sd(k: __mmask8, a: __m128d, b: __ /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ss&ig_expand=5219) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5369,7 +5369,7 @@ pub unsafe fn _mm_range_round_ss(a: __m128, b: /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5403,7 +5403,7 @@ pub unsafe fn _mm_mask_range_round_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5428,7 +5428,7 @@ pub unsafe fn _mm_maskz_range_round_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -5460,7 +5460,7 @@ pub unsafe fn _mm_mask_range_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6070,7 +6070,7 @@ pub unsafe fn _mm512_maskz_reduce_ps(k: __mmask16, a: __m512) - /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6099,7 +6099,7 @@ pub unsafe fn _mm_reduce_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6137,7 +6137,7 @@ pub unsafe fn _mm_mask_reduce_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6165,7 +6165,7 @@ pub unsafe fn _mm_maskz_reduce_round_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6188,7 +6188,7 @@ pub unsafe fn _mm_reduce_sd(a: __m128d, b: __m128d) -> __m128d /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6223,7 +6223,7 @@ pub unsafe fn _mm_mask_reduce_sd( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6248,7 +6248,7 @@ pub unsafe fn _mm_maskz_reduce_sd(k: __mmask8, a: __m128d, b: _ /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(2, 3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6274,7 +6274,7 @@ pub unsafe fn _mm_reduce_round_ss(a: __m128, b: /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(4, 5)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6312,7 +6312,7 @@ pub unsafe fn _mm_mask_reduce_round_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))] #[rustc_legacy_const_generics(3, 4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6340,7 +6340,7 @@ pub unsafe fn _mm_maskz_reduce_round_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6363,7 +6363,7 @@ pub unsafe fn _mm_reduce_ss(a: __m128, b: __m128) -> __m128 { /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6398,7 +6398,7 @@ pub unsafe fn _mm_mask_reduce_ss( /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461) #[inline] -#[target_feature(enable = "avx512dq,avx512vl")] +#[target_feature(enable = "avx512dq")] #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))] #[rustc_legacy_const_generics(3)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6745,7 +6745,7 @@ pub unsafe fn _mm512_mask_fpclass_ps_mask(k1: __mmask16, a: __m /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fpclass_sd_mask&ig_expand=3511) #[inline] -#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")] +#[target_feature(enable = "sse,avx512f,avx512dq")] #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6770,7 +6770,7 @@ pub unsafe fn _mm_fpclass_sd_mask(a: __m128d) -> __mmask8 { /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512) #[inline] -#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")] +#[target_feature(enable = "sse,avx512f,avx512dq")] #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6794,7 +6794,7 @@ pub unsafe fn _mm_mask_fpclass_sd_mask(k1: __mmask8, a: __m128d /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fpclass_ss_mask&ig_expand=3515) #[inline] -#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")] +#[target_feature(enable = "sse,avx512f,avx512dq")] #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] #[rustc_legacy_const_generics(1)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -6819,7 +6819,7 @@ pub unsafe fn _mm_fpclass_ss_mask(a: __m128) -> __mmask8 { /// /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516) #[inline] -#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")] +#[target_feature(enable = "sse,avx512f,avx512dq")] #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] @@ -10052,7 +10052,7 @@ mod tests { assert_eq_m512(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_range_round_sd() { let a = _mm_set_sd(1.); let b = _mm_set_sd(2.); @@ -10061,7 +10061,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_range_round_sd() { let a = _mm_set_sd(1.); let b = _mm_set_sd(2.); @@ -10071,7 +10071,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_range_round_sd() { let a = _mm_set_sd(1.); let b = _mm_set_sd(2.); @@ -10080,7 +10080,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_range_sd() { let a = _mm_set_sd(1.); let b = _mm_set_sd(2.); @@ -10090,7 +10090,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_range_sd() { let a = _mm_set_sd(1.); let b = _mm_set_sd(2.); @@ -10099,7 +10099,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_range_round_ss() { let a = _mm_set_ss(1.); let b = _mm_set_ss(2.); @@ -10108,7 +10108,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_range_round_ss() { let a = _mm_set_ss(1.); let b = _mm_set_ss(2.); @@ -10118,7 +10118,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_range_round_ss() { let a = _mm_set_ss(1.); let b = _mm_set_ss(2.); @@ -10127,7 +10127,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_range_ss() { let a = _mm_set_ss(1.); let b = _mm_set_ss(2.); @@ -10137,7 +10137,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_range_ss() { let a = _mm_set_ss(1.); let b = _mm_set_ss(2.); @@ -10391,7 +10391,7 @@ mod tests { assert_eq_m512(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_reduce_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10400,7 +10400,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_reduce_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10412,7 +10412,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_reduce_round_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10422,7 +10422,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_reduce_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10431,7 +10431,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_reduce_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10441,7 +10441,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_reduce_sd() { let a = _mm_set_pd(1., 2.); let b = _mm_set_sd(0.25); @@ -10450,7 +10450,7 @@ mod tests { assert_eq_m128d(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_reduce_round_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10459,7 +10459,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_reduce_round_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10471,7 +10471,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_reduce_round_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10481,7 +10481,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_reduce_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10490,7 +10490,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_reduce_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10500,7 +10500,7 @@ mod tests { assert_eq_m128(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_maskz_reduce_ss() { let a = _mm_set_ps(1., 2., 3., 4.); let b = _mm_set_ss(0.25); @@ -10675,7 +10675,7 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_fpclass_sd_mask() { let a = _mm_set_pd(1., f64::INFINITY); let r = _mm_fpclass_sd_mask::<0x18>(a); @@ -10683,7 +10683,7 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_fpclass_sd_mask() { let a = _mm_set_sd(f64::INFINITY); let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a); @@ -10691,7 +10691,7 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_fpclass_ss_mask() { let a = _mm_set_ss(f32::INFINITY); let r = _mm_fpclass_ss_mask::<0x18>(a); @@ -10699,7 +10699,7 @@ mod tests { assert_eq!(r, e); } - #[simd_test(enable = "avx512dq,avx512vl")] + #[simd_test(enable = "avx512dq")] unsafe fn test_mm_mask_fpclass_ss_mask() { let a = _mm_set_ss(f32::INFINITY); let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a); diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs index 286e1e1517..309a315039 100644 --- a/crates/core_arch/src/x86/avx512f.rs +++ b/crates/core_arch/src/x86/avx512f.rs @@ -25301,7 +25301,7 @@ pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1 pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { simd_shuffle!( a, - _mm_set1_ps(-1.), + _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4], ) } @@ -25315,7 +25315,7 @@ pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 { pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 { simd_shuffle!( a, - _mm256_set1_ps(-1.), + _mm256_undefined_ps(), [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8], ) } @@ -25375,7 +25375,7 @@ pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d { - transmute(a.as_m512()) + transmute(a) } /// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25385,7 +25385,7 @@ pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i { - transmute(a.as_m512()) + transmute(a) } /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25395,7 +25395,7 @@ pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { - simd_shuffle!(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25405,7 +25405,7 @@ pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d { - simd_shuffle!(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25455,7 +25455,7 @@ pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 { - transmute(a.as_m512d()) + transmute(a) } /// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25465,7 +25465,7 @@ pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i { - transmute(a.as_m512d()) + transmute(a) } /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25475,7 +25475,7 @@ pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { - simd_shuffle!(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2]) + simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2]) } /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25485,7 +25485,7 @@ pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i { #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i { - simd_shuffle!(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4]) + simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4]) } /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency. @@ -25654,7 +25654,7 @@ pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq +#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i { simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0]) } @@ -39930,12 +39930,12 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2si(a, SAE) + vcvttss2si(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39945,12 +39945,12 @@ pub unsafe fn _mm_cvtt_roundss_si32(a: __m128) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2si(a, SAE) + vcvttss2si(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ @@ -39960,12 +39960,12 @@ pub unsafe fn _mm_cvtt_roundss_i32(a: __m128) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2usi(a, SAE) + vcvttss2usi(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. @@ -39974,9 +39974,9 @@ pub unsafe fn _mm_cvtt_roundss_u32(a: __m128) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si))] +#[cfg_attr(test, assert_instr(vcvttss2si))] pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 { - vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) + vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. @@ -39985,9 +39985,9 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2usi))] +#[cfg_attr(test, assert_instr(vcvttss2usi))] pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { - vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) + vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -39997,12 +39997,12 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2si(a, SAE) + vcvttsd2si(a, SAE) } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\ @@ -40012,12 +40012,12 @@ pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2si(a, SAE) + vcvttsd2si(a, SAE) } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\ @@ -40027,12 +40027,12 @@ pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2usi(a, SAE) + vcvttsd2usi(a, SAE) } /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst. @@ -40041,9 +40041,9 @@ pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d) -> u32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si))] +#[cfg_attr(test, assert_instr(vcvttsd2si))] pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 { - vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) + vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst. @@ -40052,9 +40052,9 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2usi))] +#[cfg_attr(test, assert_instr(vcvttsd2usi))] pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 { - vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) + vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. @@ -41565,6 +41565,16 @@ extern "C" { #[link_name = "llvm.x86.avx512.cvtusi642sd"] fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.cvttss2si"] + fn vcvttss2si(a: f32x4, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.cvttss2usi"] + fn vcvttss2usi(a: f32x4, rounding: i32) -> u32; + + #[link_name = "llvm.x86.avx512.cvttsd2si"] + fn vcvttsd2si(a: f64x2, rounding: i32) -> i32; + #[link_name = "llvm.x86.avx512.cvttsd2usi"] + fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32; + #[link_name = "llvm.x86.avx512.vcomi.ss"] fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32; #[link_name = "llvm.x86.avx512.vcomi.sd"] @@ -52766,20 +52776,14 @@ mod tests { unsafe fn test_mm512_castps128_ps512() { let a = _mm_setr_ps(17., 18., 19., 20.); let r = _mm512_castps128_ps512(a); - let e = _mm512_setr_ps( - 17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., - ); - assert_eq_m512(r, e); + assert_eq_m128(_mm512_castps512_ps128(r), a); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_castps256_ps512() { let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.); let r = _mm512_castps256_ps512(a); - let e = _mm512_setr_ps( - 17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1., - ); - assert_eq_m512(r, e); + assert_eq_m256(_mm512_castps512_ps256(r), a); } #[simd_test(enable = "avx512f")] diff --git a/crates/core_arch/src/x86/gfni.rs b/crates/core_arch/src/x86/gfni.rs index 84dcf4203e..6c7f7a2f17 100644 --- a/crates/core_arch/src/x86/gfni.rs +++ b/crates/core_arch/src/x86/gfni.rs @@ -65,7 +65,7 @@ extern "C" { /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i { @@ -243,7 +243,7 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _ /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(2)] @@ -473,7 +473,7 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8( /// /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(2)] @@ -877,7 +877,7 @@ mod tests { _mm512_loadu_si512(black_box(pointer)) } - #[simd_test(enable = "gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512f")] unsafe fn test_mm512_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -927,7 +927,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx")] unsafe fn test_mm256_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -977,7 +977,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni")] unsafe fn test_mm_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -1027,7 +1027,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512f")] unsafe fn test_mm512_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1104,7 +1104,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx")] unsafe fn test_mm256_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1181,7 +1181,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni")] unsafe fn test_mm_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1257,7 +1257,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512f")] unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1345,7 +1345,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx")] unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1433,7 +1433,7 @@ mod tests { } } - #[simd_test(enable = "gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni")] unsafe fn test_mm_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 9a8a4844ce..a4602301de 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1866,6 +1866,7 @@ pub const _MM_HINT_ET1: i32 = 6; #[rustc_legacy_const_generics(1)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_prefetch(p: *const i8) { + static_assert_uimm_bits!(STRATEGY, 3); // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache). // `locality` and `rw` are based on our `STRATEGY`. prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1); diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs index bdc94de3b7..4c3ac546d1 100644 --- a/crates/core_arch/src/x86/sse2.rs +++ b/crates/core_arch/src/x86/sse2.rs @@ -2444,7 +2444,7 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd) #[inline] #[target_feature(enable = "sse2")] -#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected +#[cfg_attr(test, assert_instr(xorp))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_setzero_pd() -> __m128d { _mm_set_pd(0.0, 0.0) diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs index 77da2601fe..37f8ef50f8 100644 --- a/crates/core_arch/src/x86_64/avx.rs +++ b/crates/core_arch/src/x86_64/avx.rs @@ -29,11 +29,24 @@ pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m25 transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i)) } +/// Extracts a 64-bit integer from `a`, selected with `INDEX`. +/// +/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64) +#[inline] +#[target_feature(enable = "avx")] +#[rustc_legacy_const_generics(1)] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { + static_assert_uimm_bits!(INDEX, 2); + simd_extract!(a.as_i64x4(), INDEX as u32) +} + #[cfg(test)] mod tests { use stdarch_test::simd_test; - use crate::core_arch::x86::*; + use crate::core_arch::arch::x86_64::*; #[simd_test(enable = "avx")] unsafe fn test_mm256_insert_epi64() { @@ -42,4 +55,11 @@ mod tests { let e = _mm256_setr_epi64x(1, 2, 3, 0); assert_eq_m256i(r, e); } + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_extract_epi64() { + let a = _mm256_setr_epi64x(0, 1, 2, 3); + let r = _mm256_extract_epi64::<3>(a); + assert_eq!(r, 3); + } } diff --git a/crates/core_arch/src/x86_64/avx2.rs b/crates/core_arch/src/x86_64/avx2.rs deleted file mode 100644 index fb4ea2a56a..0000000000 --- a/crates/core_arch/src/x86_64/avx2.rs +++ /dev/null @@ -1,48 +0,0 @@ -//! Advanced Vector Extensions 2 (AVX) -//! -//! AVX2 expands most AVX commands to 256-bit wide vector registers and -//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). -//! -//! The references are: -//! -//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: -//! Instruction Set Reference, A-Z][intel64_ref]. -//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and -//! System Instructions][amd64_ref]. -//! -//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick -//! overview of the instructions available. -//! -//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf -//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf -//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions -//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate - -use crate::core_arch::x86::*; -use crate::intrinsics::simd::*; - -/// Extracts a 64-bit integer from `a`, selected with `INDEX`. -/// -/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64) -#[inline] -#[target_feature(enable = "avx2")] -#[rustc_legacy_const_generics(1)] -// This intrinsic has no corresponding instruction. -#[stable(feature = "simd_x86", since = "1.27.0")] -pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { - static_assert_uimm_bits!(INDEX, 2); - simd_extract!(a.as_i64x4(), INDEX as u32) -} - -#[cfg(test)] -mod tests { - use crate::core_arch::arch::x86_64::*; - use stdarch_test::simd_test; - - #[simd_test(enable = "avx2")] - unsafe fn test_mm256_extract_epi64() { - let a = _mm256_setr_epi64x(0, 1, 2, 3); - let r = _mm256_extract_epi64::<3>(a); - assert_eq!(r, 3); - } -} diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs index aa501bec3d..9953206c88 100644 --- a/crates/core_arch/src/x86_64/avx512f.rs +++ b/crates/core_arch/src/x86_64/avx512f.rs @@ -105,9 +105,9 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si))] +#[cfg_attr(test, assert_instr(vcvttsd2si))] pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { - vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) + vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. @@ -116,9 +116,9 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2usi))] +#[cfg_attr(test, assert_instr(vcvttsd2usi))] pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { - vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) + vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. @@ -127,9 +127,9 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si))] +#[cfg_attr(test, assert_instr(vcvttss2si))] pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { - vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) + vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. @@ -138,9 +138,9 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2usi))] +#[cfg_attr(test, assert_instr(vcvttss2usi))] pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { - vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) + vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION) } /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. @@ -396,12 +396,12 @@ pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2si64(a, SAE) + vcvttsd2si64(a, SAE) } /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -411,12 +411,12 @@ pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2si64(a, SAE) + vcvttsd2si64(a, SAE) } /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ @@ -426,12 +426,12 @@ pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { static_assert_sae!(SAE); let a = a.as_f64x2(); - vcvtsd2usi64(a, SAE) + vcvttsd2usi64(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -441,12 +441,12 @@ pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2si64(a, SAE) + vcvttss2si64(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ @@ -456,12 +456,12 @@ pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2si64(a, SAE) + vcvttss2si64(a, SAE) } /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ @@ -471,12 +471,12 @@ pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { #[inline] #[target_feature(enable = "avx512f")] #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] -#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))] #[rustc_legacy_const_generics(1)] pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { static_assert_sae!(SAE); let a = a.as_f32x4(); - vcvtss2usi64(a, SAE) + vcvttss2usi64(a, SAE) } #[allow(improper_ctypes)] @@ -498,6 +498,15 @@ extern "C" { fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4; #[link_name = "llvm.x86.avx512.cvtusi642sd"] fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2; + + #[link_name = "llvm.x86.avx512.cvttss2si64"] + fn vcvttss2si64(a: f32x4, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.cvttss2usi64"] + fn vcvttss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.cvttsd2si64"] + fn vcvttsd2si64(a: f64x2, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.cvttsd2usi64"] + fn vcvttsd2usi64(a: f64x2, rounding: i32) -> u64; } #[cfg(test)] @@ -9840,16 +9849,14 @@ mod tests { unsafe fn test_mm512_castpd128_pd512() { let a = _mm_setr_pd(17., 18.); let r = _mm512_castpd128_pd512(a); - let e = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.); - assert_eq_m512d(r, e); + assert_eq_m128d(_mm512_castpd512_pd128(r), a); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_castpd256_pd512() { let a = _mm256_setr_pd(17., 18., 19., 20.); let r = _mm512_castpd256_pd512(a); - let e = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.); - assert_eq_m512d(r, e); + assert_eq_m256d(_mm512_castpd512_pd256(r), a); } #[simd_test(enable = "avx512f")] @@ -9910,16 +9917,14 @@ mod tests { unsafe fn test_mm512_castsi128_si512() { let a = _mm_setr_epi64x(17, 18); let r = _mm512_castsi128_si512(a); - let e = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1); - assert_eq_m512i(r, e); + assert_eq_m128i(_mm512_castsi512_si128(r), a); } #[simd_test(enable = "avx512f")] unsafe fn test_mm512_castsi256_si512() { let a = _mm256_setr_epi64x(17, 18, 19, 20); let r = _mm512_castsi256_si512(a); - let e = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1); - assert_eq_m512i(r, e); + assert_eq_m256i(_mm512_castsi512_si256(r), a); } #[simd_test(enable = "avx512f")] diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs index 708dc90823..ff46373d90 100644 --- a/crates/core_arch/src/x86_64/mod.rs +++ b/crates/core_arch/src/x86_64/mod.rs @@ -42,10 +42,6 @@ mod bmi2; #[stable(feature = "simd_x86", since = "1.27.0")] pub use self::bmi2::*; -mod avx2; -#[stable(feature = "simd_x86", since = "1.27.0")] -pub use self::avx2::*; - mod avx512f; #[unstable(feature = "stdarch_x86_avx512", issue = "111137")] pub use self::avx512f::*; diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs index 1eb939abcd..ff31c31c89 100644 --- a/crates/stdarch-verify/src/lib.rs +++ b/crates/stdarch-verify/src/lib.rs @@ -196,7 +196,6 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream { "_MM_MANTISSA_NORM_ENUM" => quote! { &MM_MANTISSA_NORM_ENUM }, "_MM_MANTISSA_SIGN_ENUM" => quote! { &MM_MANTISSA_SIGN_ENUM }, "_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM }, - "__m64" => quote! { &M64 }, "bool" => quote! { &BOOL }, "f32" => quote! { &F32 }, "f64" => quote! { &F64 }, diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs index c5773a0984..250b0cf720 100644 --- a/crates/stdarch-verify/tests/x86-intel.rs +++ b/crates/stdarch-verify/tests/x86-intel.rs @@ -11,7 +11,7 @@ clippy::useless_format )] -use std::collections::{BTreeMap, HashMap}; +use std::collections::{BTreeMap, HashMap, HashSet}; use std::fs::File; use std::io; use std::io::{BufWriter, Write}; @@ -46,7 +46,6 @@ static U64: Type = Type::PrimUnsigned(64); static U128: Type = Type::PrimUnsigned(128); static ORDERING: Type = Type::Ordering; -static M64: Type = Type::M64; static M128: Type = Type::M128; static M128BH: Type = Type::M128BH; static M128I: Type = Type::M128I; @@ -79,7 +78,6 @@ enum Type { PrimUnsigned(u8), MutPtr(&'static Type), ConstPtr(&'static Type), - M64, M128, M128BH, M128D, @@ -182,17 +180,12 @@ fn verify_all_signatures() { if !rust.has_test { // FIXME: this list should be almost empty let skip = [ + // EFLAGS "__readeflags", "__readeflags", "__writeeflags", "__writeeflags", - "_mm_comige_ss", - "_mm_cvt_ss2si", - "_mm_cvtt_ss2si", - "_mm_cvt_si2ss", - "_mm_set_ps1", - "_mm_load_ps1", - "_mm_store_ps1", + // MXCSR - deprecated "_mm_getcsr", "_mm_setcsr", "_MM_GET_EXCEPTION_MASK", @@ -203,26 +196,11 @@ fn verify_all_signatures() { "_MM_SET_EXCEPTION_STATE", "_MM_SET_FLUSH_ZERO_MODE", "_MM_SET_ROUNDING_MODE", - "_mm_prefetch", - "_mm_undefined_ps", - "_m_pmaxsw", - "_m_pmaxub", - "_m_pminsw", - "_m_pminub", - "_m_pavgb", - "_m_pavgw", - "_m_psadbw", - "_mm_cvt_pi2ps", - "_m_maskmovq", - "_m_pextrw", - "_m_pinsrw", - "_m_pmovmskb", - "_m_pshufw", - "_mm_cvtt_ps2pi", - "_mm_cvt_ps2pi", + // CPUID "__cpuid_count", "__cpuid", "__get_cpuid_max", + // Priviledged "_xsave", "_xrstor", "_xsetbv", @@ -231,59 +209,48 @@ fn verify_all_signatures() { "_xsavec", "_xsaves", "_xrstors", - "_mm_bslli_si128", - "_mm_bsrli_si128", - "_mm_undefined_pd", - "_mm_undefined_si128", - "_mm_cvtps_ph", - "_mm256_cvtps_ph", + "_xsave64", + "_xrstor64", + "_xsaveopt64", + "_xsavec64", + "_xsaves64", + "_xrstors64", + "_fxsave", + "_fxrstor", + "_fxsave64", + "_fxrstor64", + // TSC "_rdtsc", "__rdtscp", - "_mm256_castps128_ps256", - "_mm256_castpd128_pd256", - "_mm256_castsi128_si256", - "_mm256_undefined_ps", - "_mm256_undefined_pd", - "_mm256_undefined_si256", - "_bextr2_u32", - "_mm_tzcnt_32", - "_m_paddb", - "_m_paddw", - "_m_paddd", - "_m_paddsb", - "_m_paddsw", - "_m_paddusb", - "_m_paddusw", - "_m_psubb", - "_m_psubw", - "_m_psubd", - "_m_psubsb", - "_m_psubsw", - "_m_psubusb", - "_m_psubusw", - "_mm_set_pi16", - "_mm_set_pi32", - "_mm_set_pi8", - "_mm_set1_pi16", - "_mm_set1_pi32", - "_mm_set1_pi8", - "_mm_setr_pi16", - "_mm_setr_pi32", - "_mm_setr_pi8", - "_mm_min_epi8", - "_mm_min_epi32", + // TBM + "_t1mskc_u64", + // RTM "_xbegin", "_xend", + // RDRAND "_rdrand16_step", "_rdrand32_step", + "_rdrand64_step", "_rdseed16_step", "_rdseed32_step", - "_fxsave", - "_fxrstor", - "_t1mskc_u64", + "_rdseed64_step", + // Prefetch + "_mm_prefetch", + // CMPXCHG + "cmpxchg16b", + // Undefined + "_mm_undefined_ps", + "_mm_undefined_pd", + "_mm_undefined_si128", + "_mm256_undefined_ps", + "_mm256_undefined_pd", + "_mm256_undefined_si256", + "_mm512_undefined_ps", + "_mm512_undefined_pd", + "_mm512_undefined_epi32", + "_mm512_undefined", + // Has doc-tests instead "_mm256_shuffle_epi32", - "_mm256_bslli_epi128", - "_mm256_bsrli_epi128", "_mm256_unpackhi_epi8", "_mm256_unpacklo_epi8", "_mm256_unpackhi_epi16", @@ -292,26 +259,31 @@ fn verify_all_signatures() { "_mm256_unpacklo_epi32", "_mm256_unpackhi_epi64", "_mm256_unpacklo_epi64", - "_xsave64", - "_xrstor64", - "_xsaveopt64", - "_xsavec64", - "_xsaves64", - "_xrstors64", + // Has tests with different name + "_mm_min_epi8", + "_mm_min_epi32", + // Needs `f16` to test + "_mm_cvtps_ph", + "_mm256_cvtps_ph", + // Aliases + "_mm_comige_ss", + "_mm_cvt_ss2si", + "_mm_cvtt_ss2si", + "_mm_cvt_si2ss", + "_mm_set_ps1", + "_mm_load_ps1", + "_mm_store_ps1", + "_mm_bslli_si128", + "_mm_bsrli_si128", + "_bextr2_u32", + "_mm_tzcnt_32", + "_mm256_bslli_epi128", + "_mm256_bsrli_epi128", "_mm_cvtsi64x_si128", "_mm_cvtsi128_si64x", "_mm_cvtsi64x_sd", - "cmpxchg16b", - "_rdrand64_step", - "_rdseed64_step", "_bextr2_u64", "_mm_tzcnt_64", - "_fxsave64", - "_fxrstor64", - "_mm512_undefined_ps", - "_mm512_undefined_pd", - "_mm512_undefined_epi32", - "_mm512_undefined", ]; if !skip.contains(&rust.name) { println!( @@ -465,6 +437,14 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { } } + let rust_features: HashSet = match rust.target_feature { + Some(features) => features + .split(',') + .map(|feature| feature.to_string()) + .collect(), + None => HashSet::new(), + }; + for cpuid in &intel.cpuid { // The pause intrinsic is in the SSE2 module, but it is backwards // compatible with CPUs without SSE2, and it therefore does not need the @@ -539,15 +519,11 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> { }; let fixed_cpuid = fixup_cpuid(cpuid); - let rust_feature = rust - .target_feature - .unwrap_or_else(|| panic!("no target feature listed for {}", rust.name)); - - if !rust_feature.contains(&fixed_cpuid) { + if !rust_features.contains(&fixed_cpuid) { bail!( - "intel cpuid `{}` not in `{}` for {}", + "intel cpuid `{}` not in `{:?}` for {}", fixed_cpuid, - rust_feature, + rust_features, rust.name ); } @@ -750,7 +726,6 @@ fn equate( ) => {} (&Type::PrimUnsigned(64), "unsigned __int64") => {} - (&Type::M64, "__m64") => {} (&Type::M128, "__m128") => {} (&Type::M128BH, "__m128bh") => {} (&Type::M128I, "__m128i") => {} @@ -784,7 +759,6 @@ fn equate( (&Type::MutPtr(&Type::MMASK64), "__mmask64*") => {} (&Type::MutPtr(&Type::MMASK16), "__mmask16*") => {} - (&Type::MutPtr(&Type::M64), "__m64*") => {} (&Type::MutPtr(&Type::M128), "__m128*") => {} (&Type::MutPtr(&Type::M128BH), "__m128bh*") => {} (&Type::MutPtr(&Type::M128I), "__m128i*") => {} @@ -808,7 +782,6 @@ fn equate( (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {} (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {} - (&Type::ConstPtr(&Type::M64), "__m64 const*") => {} (&Type::ConstPtr(&Type::M128), "__m128 const*") => {} (&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {} (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}