diff --git a/crates/core_arch/src/x86/sse.rs b/crates/core_arch/src/x86/sse.rs index 3de09ca964..551da7652b 100644 --- a/crates/core_arch/src/x86/sse.rs +++ b/crates/core_arch/src/x86/sse.rs @@ -1251,6 +1251,19 @@ pub unsafe fn _mm_loadr_ps(p: *const f32) -> __m128 { simd_shuffle4(a, a, [3, 2, 1, 0]) } +/// Loads unaligned 64-bits of integer data from memory into new vector. +/// +/// `mem_addr` does not need to be aligned on any particular boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(all(test, not(target_arch = "x86")), assert_instr(movq))] +#[stable(feature = "simd_x86_mm_loadu_si64", since = "1.46.0")] +pub unsafe fn _mm_loadu_si64(mem_addr: *const u8) -> __m128i { + transmute(i64x2(0, ptr::read_unaligned(mem_addr as *const i64))) +} + /// Stores the upper half of `a` (64 bits) into memory. /// /// This intrinsic corresponds to the `MOVHPS` instruction. The compiler may @@ -3658,6 +3671,13 @@ mod tests { assert_eq_m128(r, e); } + #[simd_test(enable = "sse2")] + unsafe fn test_mm_loadu_si64() { + let a = _mm_setr_epi64x(5, 6); + let r = _mm_loadu_si64(&a as *const _ as *const _); + assert_eq_m128i(r, _mm_set_epi64x(5, 0)); + } + #[simd_test(enable = "sse")] unsafe fn test_mm_storeh_pi() { let mut vals = [0.0f32; 8]; diff --git a/crates/stdarch-test/src/lib.rs b/crates/stdarch-test/src/lib.rs index fa73a7bba6..38025b1701 100644 --- a/crates/stdarch-test/src/lib.rs +++ b/crates/stdarch-test/src/lib.rs @@ -3,7 +3,6 @@ //! This basically just disassembles the current executable and then parses the //! output once globally and then provides the `assert` function which makes //! assertions about the disassembly of a function. -#![feature(const_transmute)] #![feature(vec_leak)] #![allow(clippy::missing_docs_in_private_items, clippy::print_stdout)]