From 27a847756f5b990aca905838fcda5433f98f69c7 Mon Sep 17 00:00:00 2001 From: Simon Ellmann Date: Tue, 20 Aug 2024 11:28:30 +0200 Subject: [PATCH] Add sfence after non-temporal stores on x86 We definitely need fences there, see: https://doc.rust-lang.org/core/arch/x86/fn._mm_sfence.html Even more interesting, the discussion on NT stores in Rust: https://github.com/rust-lang/rust/issues/114582 And on broken NT stores in LLVM: https://github.com/llvm/llvm-project/issues/64521 Oh boy ... --- benches/memcpy.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/benches/memcpy.rs b/benches/memcpy.rs index 8c5a26e..e521481 100644 --- a/benches/memcpy.rs +++ b/benches/memcpy.rs @@ -39,6 +39,7 @@ unsafe fn memcpy_avx(mut src: *const u8, mut dst: *mut u8, count: usize) { _mm_prefetch::<_MM_HINT_T2>(src as *const i8); dst = dst.add(32); } + _mm_sfence(); } #[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse", target_feature = "avx512f"))] @@ -51,6 +52,7 @@ unsafe fn memcpy_avx512(mut src: *const u8, mut dst: *mut u8, count: usize) { _mm_prefetch::<_MM_HINT_T2>(src as *const i8); dst = dst.add(64); } + _mm_sfence(); } #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]