diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs
index db17be9f21..aa5a5d8c18 100644
--- a/crates/core_arch/src/x86/avx.rs
+++ b/crates/core_arch/src/x86/avx.rs
@@ -52,9 +52,8 @@ pub unsafe fn _mm256_add_ps(a: __m256, b: __m256) -> __m256 {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_and_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: Should be 'vandpd' instruction.
 // See https://github.com/rust-lang/stdarch/issues/71
-#[cfg_attr(test, assert_instr(vandps))]
+#[cfg_attr(test, assert_instr(vandp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = transmute(a);
@@ -82,9 +81,8 @@ pub unsafe fn _mm256_and_ps(a: __m256, b: __m256) -> __m256 {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_or_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: should be `vorpd` instruction.
 // See <https://github.com/rust-lang/stdarch/issues/71>.
-#[cfg_attr(test, assert_instr(vorps))]
+#[cfg_attr(test, assert_instr(vorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = transmute(a);
@@ -162,8 +160,7 @@ pub unsafe fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_andnot_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME: should be `vandnpd` instruction.
-#[cfg_attr(test, assert_instr(vandnps))]
+#[cfg_attr(test, assert_instr(vandnp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = transmute(a);
@@ -615,8 +612,7 @@ pub unsafe fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256 {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_xor_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-// FIXME Should be 'vxorpd' instruction.
-#[cfg_attr(test, assert_instr(vxorps))]
+#[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d {
     let a: u64x4 = transmute(a);
@@ -995,6 +991,29 @@ pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
     transmute(dst)
 }
 
+/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
+#[inline]
+#[target_feature(enable = "avx")]
+// This intrinsic has no corresponding instruction.
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
+    static_assert_uimm_bits!(INDEX, 3);
+    simd_extract!(a.as_i32x8(), INDEX as u32)
+}
+
+/// Returns the first element of the input vector of `[8 x i32]`.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
+#[inline]
+#[target_feature(enable = "avx")]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
+    simd_extract!(a.as_i32x8(), 0)
+}
+
 /// Zeroes the contents of all XMM or YMM registers.
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_zeroall)
@@ -1378,7 +1397,7 @@ pub unsafe fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m25
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_load_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
+#[cfg_attr(test, assert_instr(vmovap))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
@@ -1393,7 +1412,7 @@ pub unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_store_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovaps))] // FIXME vmovapd expected
+#[cfg_attr(test, assert_instr(vmovap))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 #[allow(clippy::cast_ptr_alignment)]
 pub unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d) {
@@ -1437,7 +1456,7 @@ pub unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256) {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_loadu_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
+#[cfg_attr(test, assert_instr(vmovup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
     let mut dst = _mm256_undefined_pd();
@@ -1456,7 +1475,7 @@ pub unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_storeu_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovups))] // FIXME vmovupd expected
+#[cfg_attr(test, assert_instr(vmovup))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d) {
     mem_addr.cast::<__m256d>().write_unaligned(a);
@@ -2145,7 +2164,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_setzero_pd)
 #[inline]
 #[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vxorps))] // FIXME vxorpd expected
+#[cfg_attr(test, assert_instr(vxorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_setzero_pd() -> __m256d {
     _mm256_set1_pd(0.0)
@@ -2676,8 +2695,7 @@ pub unsafe fn _mm256_castsi256_si128(a: __m256i) -> __m128i {
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
-    // FIXME simd_shuffle!(a, a, [0, 1, 2, 3, -1, -1, -1, -1])
-    simd_shuffle!(a, a, [0, 1, 2, 3, 0, 0, 0, 0])
+    simd_shuffle!(a, _mm_undefined_ps(), [0, 1, 2, 3, 4, 4, 4, 4])
 }
 
 /// Casts vector of type __m128d to type __m256d;
@@ -2690,8 +2708,7 @@ pub unsafe fn _mm256_castps128_ps256(a: __m128) -> __m256 {
 // instructions, thus it has zero latency.
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
-    // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
-    simd_shuffle!(a, a, [0, 1, 0, 0])
+    simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2])
 }
 
 /// Casts vector of type __m128i to type __m256i;
@@ -2705,8 +2722,8 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
     let a = a.as_i64x2();
-    // FIXME simd_shuffle!(a, a, [0, 1, -1, -1])
-    let dst: i64x4 = simd_shuffle!(a, a, [0, 1, 0, 0]);
+    let undefined = _mm_undefined_si128().as_i64x2();
+    let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
     transmute(dst)
 }
 
@@ -3719,6 +3736,22 @@ mod tests {
         assert_eq_m128i(r, e);
     }
 
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_extract_epi32() {
+        let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
+        let r1 = _mm256_extract_epi32::<0>(a);
+        let r2 = _mm256_extract_epi32::<3>(a);
+        assert_eq!(r1, -1);
+        assert_eq!(r2, 3);
+    }
+
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_cvtsi256_si32() {
+        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+        let r = _mm256_cvtsi256_si32(a);
+        assert_eq!(r, 1);
+    }
+
     #[simd_test(enable = "avx")]
     #[cfg_attr(miri, ignore)] // Register-level operation not supported by Miri
     unsafe fn test_mm256_zeroall() {
@@ -4698,6 +4731,27 @@ mod tests {
         assert_eq_m128i(r, _mm_setr_epi64x(1, 2));
     }
 
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_castps128_ps256() {
+        let a = _mm_setr_ps(1., 2., 3., 4.);
+        let r = _mm256_castps128_ps256(a);
+        assert_eq_m128(_mm256_castps256_ps128(r), a);
+    }
+
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_castpd128_pd256() {
+        let a = _mm_setr_pd(1., 2.);
+        let r = _mm256_castpd128_pd256(a);
+        assert_eq_m128d(_mm256_castpd256_pd128(r), a);
+    }
+
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_castsi128_si256() {
+        let a = _mm_setr_epi32(1, 2, 3, 4);
+        let r = _mm256_castsi128_si256(a);
+        assert_eq_m128i(_mm256_castsi256_si128(r), a);
+    }
+
     #[simd_test(enable = "avx")]
     unsafe fn test_mm256_zextps128_ps256() {
         let a = _mm_setr_ps(1., 2., 3., 4.);
diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
index f3dd0c8e4a..faaee030e0 100644
--- a/crates/core_arch/src/x86/avx2.rs
+++ b/crates/core_arch/src/x86/avx2.rs
@@ -3610,29 +3610,6 @@ pub unsafe fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
     simd_extract!(a.as_u16x16(), INDEX as u32, u16) as i32
 }
 
-/// Extracts a 32-bit integer from `a`, selected with `INDEX`.
-///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi32)
-#[inline]
-#[target_feature(enable = "avx2")]
-// This intrinsic has no corresponding instruction.
-#[rustc_legacy_const_generics(1)]
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_extract_epi32<const INDEX: i32>(a: __m256i) -> i32 {
-    static_assert_uimm_bits!(INDEX, 3);
-    simd_extract!(a.as_i32x8(), INDEX as u32)
-}
-
-/// Returns the first element of the input vector of `[8 x i32]`.
-///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_cvtsi256_si32)
-#[inline]
-#[target_feature(enable = "avx2")]
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_cvtsi256_si32(a: __m256i) -> i32 {
-    simd_extract!(a.as_i32x8(), 0)
-}
-
 #[allow(improper_ctypes)]
 extern "C" {
     #[link_name = "llvm.x86.avx2.phadd.w"]
@@ -5749,20 +5726,4 @@ mod tests {
         assert_eq!(r1, 0xFFFF);
         assert_eq!(r2, 3);
     }
-
-    #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_extract_epi32() {
-        let a = _mm256_setr_epi32(-1, 1, 2, 3, 4, 5, 6, 7);
-        let r1 = _mm256_extract_epi32::<0>(a);
-        let r2 = _mm256_extract_epi32::<3>(a);
-        assert_eq!(r1, -1);
-        assert_eq!(r2, 3);
-    }
-
-    #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_cvtsi256_si32() {
-        let a = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
-        let r = _mm256_cvtsi256_si32(a);
-        assert_eq!(r, 1);
-    }
 }
diff --git a/crates/core_arch/src/x86/avx512dq.rs b/crates/core_arch/src/x86/avx512dq.rs
index 08018bef86..ac88f0eca7 100644
--- a/crates/core_arch/src/x86/avx512dq.rs
+++ b/crates/core_arch/src/x86/avx512dq.rs
@@ -68,7 +68,7 @@ pub unsafe fn _mm256_maskz_and_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_and_pd&ig_expand=293)
 #[inline]
 #[target_feature(enable = "avx512dq")]
-#[cfg_attr(test, assert_instr(vandps))] // FIXME: should be `vandpd` instruction.
+#[cfg_attr(test, assert_instr(vandp))]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_and_pd(a: __m512d, b: __m512d) -> __m512d {
     transmute(simd_and(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
@@ -267,7 +267,7 @@ pub unsafe fn _mm256_maskz_andnot_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_andnot_pd&ig_expand=331)
 #[inline]
 #[target_feature(enable = "avx512dq")]
-#[cfg_attr(test, assert_instr(vandnps))] // FIXME: should be `vandnpd` instruction.
+#[cfg_attr(test, assert_instr(vandnp))]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_andnot_pd(a: __m512d, b: __m512d) -> __m512d {
     _mm512_and_pd(_mm512_xor_pd(a, transmute(_mm512_set1_epi64(-1))), b)
@@ -465,7 +465,7 @@ pub unsafe fn _mm256_maskz_or_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_or_pd&ig_expand=4829)
 #[inline]
 #[target_feature(enable = "avx512dq")]
-#[cfg_attr(test, assert_instr(vorps))] // FIXME: should be `vorpd` instruction.
+#[cfg_attr(test, assert_instr(vorp))]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_or_pd(a: __m512d, b: __m512d) -> __m512d {
     transmute(simd_or(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
@@ -662,7 +662,7 @@ pub unsafe fn _mm256_maskz_xor_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_xor_pd&ig_expand=7102)
 #[inline]
 #[target_feature(enable = "avx512dq")]
-#[cfg_attr(test, assert_instr(vxorps))] // FIXME: should be `vxorpd` instruction.
+#[cfg_attr(test, assert_instr(vxorp))]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_xor_pd(a: __m512d, b: __m512d) -> __m512d {
     transmute(simd_xor(transmute::<_, u64x8>(a), transmute::<_, u64x8>(b)))
@@ -5211,7 +5211,7 @@ pub unsafe fn _mm512_maskz_range_ps<const IMM8: i32>(k: __mmask16, a: __m512, b:
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_sd&ig_expand=5216)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5236,7 +5236,7 @@ pub unsafe fn _mm_range_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_sd&ig_expand=5214)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5270,7 +5270,7 @@ pub unsafe fn _mm_mask_range_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_sd&ig_expand=5215)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5295,7 +5295,7 @@ pub unsafe fn _mm_maskz_range_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_sd&ig_expand=5220)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5327,7 +5327,7 @@ pub unsafe fn _mm_mask_range_sd<const IMM8: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_sd&ig_expand=5221)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangesd, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5347,7 +5347,7 @@ pub unsafe fn _mm_maskz_range_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: __
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_range_round_ss&ig_expand=5219)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5369,7 +5369,7 @@ pub unsafe fn _mm_range_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b:
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_round_ss&ig_expand=5217)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5403,7 +5403,7 @@ pub unsafe fn _mm_mask_range_round_ss<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_round_ss&ig_expand=5218)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5428,7 +5428,7 @@ pub unsafe fn _mm_maskz_range_round_ss<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_range_ss&ig_expand=5222)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
 #[rustc_legacy_const_generics(4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -5460,7 +5460,7 @@ pub unsafe fn _mm_mask_range_ss<const IMM8: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_range_ss&ig_expand=5223)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vrangess, IMM8 = 5))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6070,7 +6070,7 @@ pub unsafe fn _mm512_maskz_reduce_ps<const IMM8: i32>(k: __mmask16, a: __m512) -
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_sd&ig_expand=5447)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6099,7 +6099,7 @@ pub unsafe fn _mm_reduce_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_sd&ig_expand=5445)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6137,7 +6137,7 @@ pub unsafe fn _mm_mask_reduce_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_sd&ig_expand=5446)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6165,7 +6165,7 @@ pub unsafe fn _mm_maskz_reduce_round_sd<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_sd&ig_expand=5456)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6188,7 +6188,7 @@ pub unsafe fn _mm_reduce_sd<const IMM8: i32>(a: __m128d, b: __m128d) -> __m128d
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_sd&ig_expand=5454)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6223,7 +6223,7 @@ pub unsafe fn _mm_mask_reduce_sd<const IMM8: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_sd&ig_expand=5455)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducesd, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6248,7 +6248,7 @@ pub unsafe fn _mm_maskz_reduce_sd<const IMM8: i32>(k: __mmask8, a: __m128d, b: _
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_round_ss&ig_expand=5453)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(2, 3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6274,7 +6274,7 @@ pub unsafe fn _mm_reduce_round_ss<const IMM8: i32, const SAE: i32>(a: __m128, b:
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_round_ss&ig_expand=5451)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(4, 5)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6312,7 +6312,7 @@ pub unsafe fn _mm_mask_reduce_round_ss<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_round_ss&ig_expand=5452)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0, SAE = 8))]
 #[rustc_legacy_const_generics(3, 4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6340,7 +6340,7 @@ pub unsafe fn _mm_maskz_reduce_round_ss<const IMM8: i32, const SAE: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_reduce_ss&ig_expand=5462)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6363,7 +6363,7 @@ pub unsafe fn _mm_reduce_ss<const IMM8: i32>(a: __m128, b: __m128) -> __m128 {
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_reduce_ss&ig_expand=5460)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(4)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6398,7 +6398,7 @@ pub unsafe fn _mm_mask_reduce_ss<const IMM8: i32>(
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_reduce_ss&ig_expand=5461)
 #[inline]
-#[target_feature(enable = "avx512dq,avx512vl")]
+#[target_feature(enable = "avx512dq")]
 #[cfg_attr(test, assert_instr(vreducess, IMM8 = 0))]
 #[rustc_legacy_const_generics(3)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6745,7 +6745,7 @@ pub unsafe fn _mm512_mask_fpclass_ps_mask<const IMM8: i32>(k1: __mmask16, a: __m
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fpclass_sd_mask&ig_expand=3511)
 #[inline]
-#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")]
+#[target_feature(enable = "sse,avx512f,avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6770,7 +6770,7 @@ pub unsafe fn _mm_fpclass_sd_mask<const IMM8: i32>(a: __m128d) -> __mmask8 {
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_sd_mask&ig_expand=3512)
 #[inline]
-#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")]
+#[target_feature(enable = "sse,avx512f,avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclasssd, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6794,7 +6794,7 @@ pub unsafe fn _mm_mask_fpclass_sd_mask<const IMM8: i32>(k1: __mmask8, a: __m128d
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_fpclass_ss_mask&ig_expand=3515)
 #[inline]
-#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")]
+#[target_feature(enable = "sse,avx512f,avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
 #[rustc_legacy_const_generics(1)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -6819,7 +6819,7 @@ pub unsafe fn _mm_fpclass_ss_mask<const IMM8: i32>(a: __m128) -> __mmask8 {
 ///
 /// [Intel's Documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_fpclass_ss_mask&ig_expand=3516)
 #[inline]
-#[target_feature(enable = "sse,avx512f,avx512dq,avx512vl")]
+#[target_feature(enable = "sse,avx512f,avx512dq")]
 #[cfg_attr(test, assert_instr(vfpclassss, IMM8 = 0))]
 #[rustc_legacy_const_generics(2)]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
@@ -10052,7 +10052,7 @@ mod tests {
         assert_eq_m512(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
@@ -10061,7 +10061,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
@@ -10071,7 +10071,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_round_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
@@ -10080,7 +10080,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
@@ -10090,7 +10090,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_sd() {
         let a = _mm_set_sd(1.);
         let b = _mm_set_sd(2.);
@@ -10099,7 +10099,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
@@ -10108,7 +10108,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
@@ -10118,7 +10118,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_round_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
@@ -10127,7 +10127,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
@@ -10137,7 +10137,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_range_ss() {
         let a = _mm_set_ss(1.);
         let b = _mm_set_ss(2.);
@@ -10391,7 +10391,7 @@ mod tests {
         assert_eq_m512(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10400,7 +10400,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10412,7 +10412,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_round_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10422,7 +10422,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10431,7 +10431,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10441,7 +10441,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_sd() {
         let a = _mm_set_pd(1., 2.);
         let b = _mm_set_sd(0.25);
@@ -10450,7 +10450,7 @@ mod tests {
         assert_eq_m128d(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10459,7 +10459,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10471,7 +10471,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_round_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10481,7 +10481,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10490,7 +10490,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10500,7 +10500,7 @@ mod tests {
         assert_eq_m128(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_maskz_reduce_ss() {
         let a = _mm_set_ps(1., 2., 3., 4.);
         let b = _mm_set_ss(0.25);
@@ -10675,7 +10675,7 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_fpclass_sd_mask() {
         let a = _mm_set_pd(1., f64::INFINITY);
         let r = _mm_fpclass_sd_mask::<0x18>(a);
@@ -10683,7 +10683,7 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_fpclass_sd_mask() {
         let a = _mm_set_sd(f64::INFINITY);
         let r = _mm_mask_fpclass_sd_mask::<0x18>(0b0, a);
@@ -10691,7 +10691,7 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_fpclass_ss_mask::<0x18>(a);
@@ -10699,7 +10699,7 @@ mod tests {
         assert_eq!(r, e);
     }
 
-    #[simd_test(enable = "avx512dq,avx512vl")]
+    #[simd_test(enable = "avx512dq")]
     unsafe fn test_mm_mask_fpclass_ss_mask() {
         let a = _mm_set_ss(f32::INFINITY);
         let r = _mm_mask_fpclass_ss_mask::<0x18>(0b0, a);
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 286e1e1517..309a315039 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -25301,7 +25301,7 @@ pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m1
 pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
     simd_shuffle!(
         a,
-        _mm_set1_ps(-1.),
+        _mm_undefined_ps(),
         [0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
     )
 }
@@ -25315,7 +25315,7 @@ pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
 pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
     simd_shuffle!(
         a,
-        _mm256_set1_ps(-1.),
+        _mm256_undefined_ps(),
         [0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
     )
 }
@@ -25375,7 +25375,7 @@ pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
-    transmute(a.as_m512())
+    transmute(a)
 }
 
 /// Cast vector of type __m512 to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25385,7 +25385,7 @@ pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
-    transmute(a.as_m512())
+    transmute(a)
 }
 
 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25395,7 +25395,7 @@ pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
-    simd_shuffle!(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
+    simd_shuffle!(a, _mm_undefined_pd(), [0, 1, 2, 2, 2, 2, 2, 2])
 }
 
 /// Cast vector of type __m256d to type __m512d; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25405,7 +25405,7 @@ pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
-    simd_shuffle!(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
+    simd_shuffle!(a, _mm256_undefined_pd(), [0, 1, 2, 3, 4, 4, 4, 4])
 }
 
 /// Cast vector of type __m128d to type __m512d; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25455,7 +25455,7 @@ pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
-    transmute(a.as_m512d())
+    transmute(a)
 }
 
 /// Cast vector of type __m512d to type __m512i. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25465,7 +25465,7 @@ pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
-    transmute(a.as_m512d())
+    transmute(a)
 }
 
 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25475,7 +25475,7 @@ pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
-    simd_shuffle!(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
+    simd_shuffle!(a, _mm_undefined_si128(), [0, 1, 2, 2, 2, 2, 2, 2])
 }
 
 /// Cast vector of type __m256i to type __m512i; the upper 256 bits of the result are undefined. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25485,7 +25485,7 @@ pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
-    simd_shuffle!(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
+    simd_shuffle!(a, _mm256_undefined_si256(), [0, 1, 2, 3, 4, 4, 4, 4])
 }
 
 /// Cast vector of type __m128i to type __m512i; the upper 384 bits of the result are zeroed. This intrinsic is only used for compilation and does not generate any instructions, thus it has zero latency.
@@ -25654,7 +25654,7 @@ pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vbroadcas))] //should be vpbroadcastq
+#[cfg_attr(test, assert_instr(vbroadcast))] //should be vpbroadcastq
 pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
     simd_shuffle!(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
 }
@@ -39930,12 +39930,12 @@ pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2si(a, SAE)
+    vcvttss2si(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -39945,12 +39945,12 @@ pub unsafe fn _mm_cvtt_roundss_si32<const SAE: i32>(a: __m128) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2si(a, SAE)
+    vcvttss2si(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
@@ -39960,12 +39960,12 @@ pub unsafe fn _mm_cvtt_roundss_i32<const SAE: i32>(a: __m128) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2usi(a, SAE)
+    vcvttss2usi(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
@@ -39974,9 +39974,9 @@ pub unsafe fn _mm_cvtt_roundss_u32<const SAE: i32>(a: __m128) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si))]
+#[cfg_attr(test, assert_instr(vcvttss2si))]
 pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
-    vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
+    vcvttss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
@@ -39985,9 +39985,9 @@ pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2usi))]
+#[cfg_attr(test, assert_instr(vcvttss2usi))]
 pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
-    vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
+    vcvttss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -39997,12 +39997,12 @@ pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2si(a, SAE)
+    vcvttsd2si(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.\
@@ -40012,12 +40012,12 @@ pub unsafe fn _mm_cvtt_roundsd_si32<const SAE: i32>(a: __m128d) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2si(a, SAE)
+    vcvttsd2si(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.\
@@ -40027,12 +40027,12 @@ pub unsafe fn _mm_cvtt_roundsd_i32<const SAE: i32>(a: __m128d) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2usi(a, SAE)
+    vcvttsd2usi(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 32-bit integer with truncation, and store the result in dst.
@@ -40041,9 +40041,9 @@ pub unsafe fn _mm_cvtt_roundsd_u32<const SAE: i32>(a: __m128d) -> u32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si))]
+#[cfg_attr(test, assert_instr(vcvttsd2si))]
 pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
-    vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
+    vcvttsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 32-bit integer with truncation, and store the result in dst.
@@ -40052,9 +40052,9 @@ pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+#[cfg_attr(test, assert_instr(vcvttsd2usi))]
 pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
-    vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
+    vcvttsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the unsigned 32-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
@@ -41565,6 +41565,16 @@ extern "C" {
     #[link_name = "llvm.x86.avx512.cvtusi642sd"]
     fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
 
+    #[link_name = "llvm.x86.avx512.cvttss2si"]
+    fn vcvttss2si(a: f32x4, rounding: i32) -> i32;
+    #[link_name = "llvm.x86.avx512.cvttss2usi"]
+    fn vcvttss2usi(a: f32x4, rounding: i32) -> u32;
+
+    #[link_name = "llvm.x86.avx512.cvttsd2si"]
+    fn vcvttsd2si(a: f64x2, rounding: i32) -> i32;
+    #[link_name = "llvm.x86.avx512.cvttsd2usi"]
+    fn vcvttsd2usi(a: f64x2, rounding: i32) -> u32;
+
     #[link_name = "llvm.x86.avx512.vcomi.ss"]
     fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
     #[link_name = "llvm.x86.avx512.vcomi.sd"]
@@ -52766,20 +52776,14 @@ mod tests {
     unsafe fn test_mm512_castps128_ps512() {
         let a = _mm_setr_ps(17., 18., 19., 20.);
         let r = _mm512_castps128_ps512(a);
-        let e = _mm512_setr_ps(
-            17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
-        );
-        assert_eq_m512(r, e);
+        assert_eq_m128(_mm512_castps512_ps128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_castps256_ps512() {
         let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
         let r = _mm512_castps256_ps512(a);
-        let e = _mm512_setr_ps(
-            17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
-        );
-        assert_eq_m512(r, e);
+        assert_eq_m256(_mm512_castps512_ps256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
diff --git a/crates/core_arch/src/x86/gfni.rs b/crates/core_arch/src/x86/gfni.rs
index 84dcf4203e..6c7f7a2f17 100644
--- a/crates/core_arch/src/x86/gfni.rs
+++ b/crates/core_arch/src/x86/gfni.rs
@@ -65,7 +65,7 @@ extern "C" {
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8mul_epi8)
 #[inline]
-#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 #[cfg_attr(test, assert_instr(vgf2p8mulb))]
 pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
@@ -243,7 +243,7 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affine_epi8)
 #[inline]
-#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
 #[rustc_legacy_const_generics(2)]
@@ -473,7 +473,7 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
 ///
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_gf2p8affineinv_epi64_epi8)
 #[inline]
-#[target_feature(enable = "gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
 #[rustc_legacy_const_generics(2)]
@@ -877,7 +877,7 @@ mod tests {
         _mm512_loadu_si512(black_box(pointer))
     }
 
-    #[simd_test(enable = "gfni,avx512bw")]
+    #[simd_test(enable = "gfni,avx512f")]
     unsafe fn test_mm512_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
@@ -927,7 +927,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni,avx")]
     unsafe fn test_mm256_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
@@ -977,7 +977,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni")]
     unsafe fn test_mm_gf2p8mul_epi8() {
         let (left, right, expected) = generate_byte_mul_test_data();
 
@@ -1027,7 +1027,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw")]
+    #[simd_test(enable = "gfni,avx512f")]
     unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
@@ -1104,7 +1104,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni,avx")]
     unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
@@ -1181,7 +1181,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni")]
     unsafe fn test_mm_gf2p8affine_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
@@ -1257,7 +1257,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw")]
+    #[simd_test(enable = "gfni,avx512f")]
     unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
@@ -1345,7 +1345,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni,avx")]
     unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
@@ -1433,7 +1433,7 @@ mod tests {
         }
     }
 
-    #[simd_test(enable = "gfni,avx512bw,avx512vl")]
+    #[simd_test(enable = "gfni")]
     unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
         let identity: i64 = 0x01_02_04_08_10_20_40_80;
         const IDENTITY_BYTE: i32 = 0;
diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs
index 9a8a4844ce..a4602301de 100644
--- a/crates/core_arch/src/x86/sse.rs
+++ b/crates/core_arch/src/x86/sse.rs
@@ -1866,6 +1866,7 @@ pub const _MM_HINT_ET1: i32 = 6;
 #[rustc_legacy_const_generics(1)]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_prefetch<const STRATEGY: i32>(p: *const i8) {
+    static_assert_uimm_bits!(STRATEGY, 3);
     // We use the `llvm.prefetch` intrinsic with `cache type` = 1 (data cache).
     // `locality` and `rw` are based on our `STRATEGY`.
     prefetch(p, (STRATEGY >> 2) & 1, STRATEGY & 3, 1);
diff --git a/crates/core_arch/src/x86/sse2.rs b/crates/core_arch/src/x86/sse2.rs
index bdc94de3b7..4c3ac546d1 100644
--- a/crates/core_arch/src/x86/sse2.rs
+++ b/crates/core_arch/src/x86/sse2.rs
@@ -2444,7 +2444,7 @@ pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d {
 /// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_setzero_pd)
 #[inline]
 #[target_feature(enable = "sse2")]
-#[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected
+#[cfg_attr(test, assert_instr(xorp))]
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub unsafe fn _mm_setzero_pd() -> __m128d {
     _mm_set_pd(0.0, 0.0)
diff --git a/crates/core_arch/src/x86_64/avx.rs b/crates/core_arch/src/x86_64/avx.rs
index 77da2601fe..37f8ef50f8 100644
--- a/crates/core_arch/src/x86_64/avx.rs
+++ b/crates/core_arch/src/x86_64/avx.rs
@@ -29,11 +29,24 @@ pub unsafe fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, i: i64) -> __m25
     transmute(simd_insert!(a.as_i64x4(), INDEX as u32, i))
 }
 
+/// Extracts a 64-bit integer from `a`, selected with `INDEX`.
+///
+/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64)
+#[inline]
+#[target_feature(enable = "avx")]
+#[rustc_legacy_const_generics(1)]
+// This intrinsic has no corresponding instruction.
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm256_extract_epi64<const INDEX: i32>(a: __m256i) -> i64 {
+    static_assert_uimm_bits!(INDEX, 2);
+    simd_extract!(a.as_i64x4(), INDEX as u32)
+}
+
 #[cfg(test)]
 mod tests {
     use stdarch_test::simd_test;
 
-    use crate::core_arch::x86::*;
+    use crate::core_arch::arch::x86_64::*;
 
     #[simd_test(enable = "avx")]
     unsafe fn test_mm256_insert_epi64() {
@@ -42,4 +55,11 @@ mod tests {
         let e = _mm256_setr_epi64x(1, 2, 3, 0);
         assert_eq_m256i(r, e);
     }
+
+    #[simd_test(enable = "avx")]
+    unsafe fn test_mm256_extract_epi64() {
+        let a = _mm256_setr_epi64x(0, 1, 2, 3);
+        let r = _mm256_extract_epi64::<3>(a);
+        assert_eq!(r, 3);
+    }
 }
diff --git a/crates/core_arch/src/x86_64/avx2.rs b/crates/core_arch/src/x86_64/avx2.rs
deleted file mode 100644
index fb4ea2a56a..0000000000
--- a/crates/core_arch/src/x86_64/avx2.rs
+++ /dev/null
@@ -1,48 +0,0 @@
-//! Advanced Vector Extensions 2 (AVX)
-//!
-//! AVX2 expands most AVX commands to 256-bit wide vector registers and
-//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
-//!
-//! The references are:
-//!
-//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
-//!   Instruction Set Reference, A-Z][intel64_ref].
-//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
-//!   System Instructions][amd64_ref].
-//!
-//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
-//! overview of the instructions available.
-//!
-//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
-//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
-//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
-//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
-
-use crate::core_arch::x86::*;
-use crate::intrinsics::simd::*;
-
-/// Extracts a 64-bit integer from `a`, selected with `INDEX`.
-///
-/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_extract_epi64)
-#[inline]
-#[target_feature(enable = "avx2")]
-#[rustc_legacy_const_generics(1)]
-// This intrinsic has no corresponding instruction.
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub unsafe fn _mm256_extract_epi64<const INDEX: i32>(a: __m256i) -> i64 {
-    static_assert_uimm_bits!(INDEX, 2);
-    simd_extract!(a.as_i64x4(), INDEX as u32)
-}
-
-#[cfg(test)]
-mod tests {
-    use crate::core_arch::arch::x86_64::*;
-    use stdarch_test::simd_test;
-
-    #[simd_test(enable = "avx2")]
-    unsafe fn test_mm256_extract_epi64() {
-        let a = _mm256_setr_epi64x(0, 1, 2, 3);
-        let r = _mm256_extract_epi64::<3>(a);
-        assert_eq!(r, 3);
-    }
-}
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index aa501bec3d..9953206c88 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -105,9 +105,9 @@ pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si))]
+#[cfg_attr(test, assert_instr(vcvttsd2si))]
 pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
-    vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
+    vcvttsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@@ -116,9 +116,9 @@ pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+#[cfg_attr(test, assert_instr(vcvttsd2usi))]
 pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
-    vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
+    vcvttsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
@@ -127,9 +127,9 @@ pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si))]
+#[cfg_attr(test, assert_instr(vcvttss2si))]
 pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
-    vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
+    vcvttss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
@@ -138,9 +138,9 @@ pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2usi))]
+#[cfg_attr(test, assert_instr(vcvttss2usi))]
 pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
-    vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
+    vcvttss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)
 }
 
 /// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
@@ -396,12 +396,12 @@ pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2si64(a, SAE)
+    vcvttsd2si64(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -411,12 +411,12 @@ pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2si64(a, SAE)
+    vcvttsd2si64(a, SAE)
 }
 
 /// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@@ -426,12 +426,12 @@ pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttsd2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
     static_assert_sae!(SAE);
     let a = a.as_f64x2();
-    vcvtsd2usi64(a, SAE)
+    vcvttsd2usi64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -441,12 +441,12 @@ pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2si64(a, SAE)
+    vcvttss2si64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
@@ -456,12 +456,12 @@ pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2si, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2si64(a, SAE)
+    vcvttss2si64(a, SAE)
 }
 
 /// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
@@ -471,12 +471,12 @@ pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
 #[inline]
 #[target_feature(enable = "avx512f")]
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))]
+#[cfg_attr(test, assert_instr(vcvttss2usi, SAE = 8))]
 #[rustc_legacy_const_generics(1)]
 pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
     static_assert_sae!(SAE);
     let a = a.as_f32x4();
-    vcvtss2usi64(a, SAE)
+    vcvttss2usi64(a, SAE)
 }
 
 #[allow(improper_ctypes)]
@@ -498,6 +498,15 @@ extern "C" {
     fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
     #[link_name = "llvm.x86.avx512.cvtusi642sd"]
     fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2;
+
+    #[link_name = "llvm.x86.avx512.cvttss2si64"]
+    fn vcvttss2si64(a: f32x4, rounding: i32) -> i64;
+    #[link_name = "llvm.x86.avx512.cvttss2usi64"]
+    fn vcvttss2usi64(a: f32x4, rounding: i32) -> u64;
+    #[link_name = "llvm.x86.avx512.cvttsd2si64"]
+    fn vcvttsd2si64(a: f64x2, rounding: i32) -> i64;
+    #[link_name = "llvm.x86.avx512.cvttsd2usi64"]
+    fn vcvttsd2usi64(a: f64x2, rounding: i32) -> u64;
 }
 
 #[cfg(test)]
@@ -9840,16 +9849,14 @@ mod tests {
     unsafe fn test_mm512_castpd128_pd512() {
         let a = _mm_setr_pd(17., 18.);
         let r = _mm512_castpd128_pd512(a);
-        let e = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.);
-        assert_eq_m512d(r, e);
+        assert_eq_m128d(_mm512_castpd512_pd128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_castpd256_pd512() {
         let a = _mm256_setr_pd(17., 18., 19., 20.);
         let r = _mm512_castpd256_pd512(a);
-        let e = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.);
-        assert_eq_m512d(r, e);
+        assert_eq_m256d(_mm512_castpd512_pd256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
@@ -9910,16 +9917,14 @@ mod tests {
     unsafe fn test_mm512_castsi128_si512() {
         let a = _mm_setr_epi64x(17, 18);
         let r = _mm512_castsi128_si512(a);
-        let e = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1);
-        assert_eq_m512i(r, e);
+        assert_eq_m128i(_mm512_castsi512_si128(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
     unsafe fn test_mm512_castsi256_si512() {
         let a = _mm256_setr_epi64x(17, 18, 19, 20);
         let r = _mm512_castsi256_si512(a);
-        let e = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1);
-        assert_eq_m512i(r, e);
+        assert_eq_m256i(_mm512_castsi512_si256(r), a);
     }
 
     #[simd_test(enable = "avx512f")]
diff --git a/crates/core_arch/src/x86_64/mod.rs b/crates/core_arch/src/x86_64/mod.rs
index 708dc90823..ff46373d90 100644
--- a/crates/core_arch/src/x86_64/mod.rs
+++ b/crates/core_arch/src/x86_64/mod.rs
@@ -42,10 +42,6 @@ mod bmi2;
 #[stable(feature = "simd_x86", since = "1.27.0")]
 pub use self::bmi2::*;
 
-mod avx2;
-#[stable(feature = "simd_x86", since = "1.27.0")]
-pub use self::avx2::*;
-
 mod avx512f;
 #[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
 pub use self::avx512f::*;
diff --git a/crates/stdarch-verify/src/lib.rs b/crates/stdarch-verify/src/lib.rs
index 1eb939abcd..ff31c31c89 100644
--- a/crates/stdarch-verify/src/lib.rs
+++ b/crates/stdarch-verify/src/lib.rs
@@ -196,7 +196,6 @@ fn to_type(t: &syn::Type) -> proc_macro2::TokenStream {
             "_MM_MANTISSA_NORM_ENUM" => quote! { &MM_MANTISSA_NORM_ENUM },
             "_MM_MANTISSA_SIGN_ENUM" => quote! { &MM_MANTISSA_SIGN_ENUM },
             "_MM_PERM_ENUM" => quote! { &MM_PERM_ENUM },
-            "__m64" => quote! { &M64 },
             "bool" => quote! { &BOOL },
             "f32" => quote! { &F32 },
             "f64" => quote! { &F64 },
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index c5773a0984..250b0cf720 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -11,7 +11,7 @@
     clippy::useless_format
 )]
 
-use std::collections::{BTreeMap, HashMap};
+use std::collections::{BTreeMap, HashMap, HashSet};
 use std::fs::File;
 use std::io;
 use std::io::{BufWriter, Write};
@@ -46,7 +46,6 @@ static U64: Type = Type::PrimUnsigned(64);
 static U128: Type = Type::PrimUnsigned(128);
 static ORDERING: Type = Type::Ordering;
 
-static M64: Type = Type::M64;
 static M128: Type = Type::M128;
 static M128BH: Type = Type::M128BH;
 static M128I: Type = Type::M128I;
@@ -79,7 +78,6 @@ enum Type {
     PrimUnsigned(u8),
     MutPtr(&'static Type),
     ConstPtr(&'static Type),
-    M64,
     M128,
     M128BH,
     M128D,
@@ -182,17 +180,12 @@ fn verify_all_signatures() {
         if !rust.has_test {
             // FIXME: this list should be almost empty
             let skip = [
+                // EFLAGS
                 "__readeflags",
                 "__readeflags",
                 "__writeeflags",
                 "__writeeflags",
-                "_mm_comige_ss",
-                "_mm_cvt_ss2si",
-                "_mm_cvtt_ss2si",
-                "_mm_cvt_si2ss",
-                "_mm_set_ps1",
-                "_mm_load_ps1",
-                "_mm_store_ps1",
+                // MXCSR - deprecated
                 "_mm_getcsr",
                 "_mm_setcsr",
                 "_MM_GET_EXCEPTION_MASK",
@@ -203,26 +196,11 @@ fn verify_all_signatures() {
                 "_MM_SET_EXCEPTION_STATE",
                 "_MM_SET_FLUSH_ZERO_MODE",
                 "_MM_SET_ROUNDING_MODE",
-                "_mm_prefetch",
-                "_mm_undefined_ps",
-                "_m_pmaxsw",
-                "_m_pmaxub",
-                "_m_pminsw",
-                "_m_pminub",
-                "_m_pavgb",
-                "_m_pavgw",
-                "_m_psadbw",
-                "_mm_cvt_pi2ps",
-                "_m_maskmovq",
-                "_m_pextrw",
-                "_m_pinsrw",
-                "_m_pmovmskb",
-                "_m_pshufw",
-                "_mm_cvtt_ps2pi",
-                "_mm_cvt_ps2pi",
+                // CPUID
                 "__cpuid_count",
                 "__cpuid",
                 "__get_cpuid_max",
+                // Priviledged
                 "_xsave",
                 "_xrstor",
                 "_xsetbv",
@@ -231,59 +209,48 @@ fn verify_all_signatures() {
                 "_xsavec",
                 "_xsaves",
                 "_xrstors",
-                "_mm_bslli_si128",
-                "_mm_bsrli_si128",
-                "_mm_undefined_pd",
-                "_mm_undefined_si128",
-                "_mm_cvtps_ph",
-                "_mm256_cvtps_ph",
+                "_xsave64",
+                "_xrstor64",
+                "_xsaveopt64",
+                "_xsavec64",
+                "_xsaves64",
+                "_xrstors64",
+                "_fxsave",
+                "_fxrstor",
+                "_fxsave64",
+                "_fxrstor64",
+                // TSC
                 "_rdtsc",
                 "__rdtscp",
-                "_mm256_castps128_ps256",
-                "_mm256_castpd128_pd256",
-                "_mm256_castsi128_si256",
-                "_mm256_undefined_ps",
-                "_mm256_undefined_pd",
-                "_mm256_undefined_si256",
-                "_bextr2_u32",
-                "_mm_tzcnt_32",
-                "_m_paddb",
-                "_m_paddw",
-                "_m_paddd",
-                "_m_paddsb",
-                "_m_paddsw",
-                "_m_paddusb",
-                "_m_paddusw",
-                "_m_psubb",
-                "_m_psubw",
-                "_m_psubd",
-                "_m_psubsb",
-                "_m_psubsw",
-                "_m_psubusb",
-                "_m_psubusw",
-                "_mm_set_pi16",
-                "_mm_set_pi32",
-                "_mm_set_pi8",
-                "_mm_set1_pi16",
-                "_mm_set1_pi32",
-                "_mm_set1_pi8",
-                "_mm_setr_pi16",
-                "_mm_setr_pi32",
-                "_mm_setr_pi8",
-                "_mm_min_epi8",
-                "_mm_min_epi32",
+                // TBM
+                "_t1mskc_u64",
+                // RTM
                 "_xbegin",
                 "_xend",
+                // RDRAND
                 "_rdrand16_step",
                 "_rdrand32_step",
+                "_rdrand64_step",
                 "_rdseed16_step",
                 "_rdseed32_step",
-                "_fxsave",
-                "_fxrstor",
-                "_t1mskc_u64",
+                "_rdseed64_step",
+                // Prefetch
+                "_mm_prefetch",
+                // CMPXCHG
+                "cmpxchg16b",
+                // Undefined
+                "_mm_undefined_ps",
+                "_mm_undefined_pd",
+                "_mm_undefined_si128",
+                "_mm256_undefined_ps",
+                "_mm256_undefined_pd",
+                "_mm256_undefined_si256",
+                "_mm512_undefined_ps",
+                "_mm512_undefined_pd",
+                "_mm512_undefined_epi32",
+                "_mm512_undefined",
+                // Has doc-tests instead
                 "_mm256_shuffle_epi32",
-                "_mm256_bslli_epi128",
-                "_mm256_bsrli_epi128",
                 "_mm256_unpackhi_epi8",
                 "_mm256_unpacklo_epi8",
                 "_mm256_unpackhi_epi16",
@@ -292,26 +259,31 @@ fn verify_all_signatures() {
                 "_mm256_unpacklo_epi32",
                 "_mm256_unpackhi_epi64",
                 "_mm256_unpacklo_epi64",
-                "_xsave64",
-                "_xrstor64",
-                "_xsaveopt64",
-                "_xsavec64",
-                "_xsaves64",
-                "_xrstors64",
+                // Has tests with different name
+                "_mm_min_epi8",
+                "_mm_min_epi32",
+                // Needs `f16` to test
+                "_mm_cvtps_ph",
+                "_mm256_cvtps_ph",
+                // Aliases
+                "_mm_comige_ss",
+                "_mm_cvt_ss2si",
+                "_mm_cvtt_ss2si",
+                "_mm_cvt_si2ss",
+                "_mm_set_ps1",
+                "_mm_load_ps1",
+                "_mm_store_ps1",
+                "_mm_bslli_si128",
+                "_mm_bsrli_si128",
+                "_bextr2_u32",
+                "_mm_tzcnt_32",
+                "_mm256_bslli_epi128",
+                "_mm256_bsrli_epi128",
                 "_mm_cvtsi64x_si128",
                 "_mm_cvtsi128_si64x",
                 "_mm_cvtsi64x_sd",
-                "cmpxchg16b",
-                "_rdrand64_step",
-                "_rdseed64_step",
                 "_bextr2_u64",
                 "_mm_tzcnt_64",
-                "_fxsave64",
-                "_fxrstor64",
-                "_mm512_undefined_ps",
-                "_mm512_undefined_pd",
-                "_mm512_undefined_epi32",
-                "_mm512_undefined",
             ];
             if !skip.contains(&rust.name) {
                 println!(
@@ -465,6 +437,14 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
         }
     }
 
+    let rust_features: HashSet<String> = match rust.target_feature {
+        Some(features) => features
+            .split(',')
+            .map(|feature| feature.to_string())
+            .collect(),
+        None => HashSet::new(),
+    };
+
     for cpuid in &intel.cpuid {
         // The pause intrinsic is in the SSE2 module, but it is backwards
         // compatible with CPUs without SSE2, and it therefore does not need the
@@ -539,15 +519,11 @@ fn matches(rust: &Function, intel: &Intrinsic) -> Result<(), String> {
         };
         let fixed_cpuid = fixup_cpuid(cpuid);
 
-        let rust_feature = rust
-            .target_feature
-            .unwrap_or_else(|| panic!("no target feature listed for {}", rust.name));
-
-        if !rust_feature.contains(&fixed_cpuid) {
+        if !rust_features.contains(&fixed_cpuid) {
             bail!(
-                "intel cpuid `{}` not in `{}` for {}",
+                "intel cpuid `{}` not in `{:?}` for {}",
                 fixed_cpuid,
-                rust_feature,
+                rust_features,
                 rust.name
             );
         }
@@ -750,7 +726,6 @@ fn equate(
         ) => {}
         (&Type::PrimUnsigned(64), "unsigned __int64") => {}
 
-        (&Type::M64, "__m64") => {}
         (&Type::M128, "__m128") => {}
         (&Type::M128BH, "__m128bh") => {}
         (&Type::M128I, "__m128i") => {}
@@ -784,7 +759,6 @@ fn equate(
         (&Type::MutPtr(&Type::MMASK64), "__mmask64*") => {}
         (&Type::MutPtr(&Type::MMASK16), "__mmask16*") => {}
 
-        (&Type::MutPtr(&Type::M64), "__m64*") => {}
         (&Type::MutPtr(&Type::M128), "__m128*") => {}
         (&Type::MutPtr(&Type::M128BH), "__m128bh*") => {}
         (&Type::MutPtr(&Type::M128I), "__m128i*") => {}
@@ -808,7 +782,6 @@ fn equate(
         (&Type::ConstPtr(&Type::PrimUnsigned(32)), "unsigned int const*") => {}
         (&Type::ConstPtr(&Type::PrimUnsigned(64)), "unsigned __int64 const*") => {}
 
-        (&Type::ConstPtr(&Type::M64), "__m64 const*") => {}
         (&Type::ConstPtr(&Type::M128), "__m128 const*") => {}
         (&Type::ConstPtr(&Type::M128BH), "__m128bh const*") => {}
         (&Type::ConstPtr(&Type::M128I), "__m128i const*") => {}