Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added f32 and f64 unaligned stores and loads from avx512f set #873

Merged
merged 6 commits into from
Jul 11, 2020
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2956,8 +2956,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_ps() -> __m256 {
// FIXME: this function should return MaybeUninit<__m256>
mem::MaybeUninit::<__m256>::uninit().assume_init()
_mm256_set1_ps(0.0)
}

/// Returns vector of type `__m256d` with undefined elements.
Expand All @@ -2968,8 +2967,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_pd() -> __m256d {
// FIXME: this function should return MaybeUninit<__m256d>
mem::MaybeUninit::<__m256d>::uninit().assume_init()
_mm256_set1_pd(0.0)
}

/// Returns vector of type __m256i with undefined elements.
Expand Down
118 changes: 118 additions & 0 deletions crates/core_arch/src/x86/avx512f.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
mem::{self, transmute},
ptr,
};

#[cfg(test)]
Expand Down Expand Up @@ -1633,6 +1634,83 @@ pub unsafe fn _mm512_mask_cmp_epi64_mask(
transmute(r)
}

/// Returns vector of type `__m512d` with undefined elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_pd)
#[inline]
#[target_feature(enable = "avx512f")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm512_undefined_pd() -> __m512d {
_mm512_set1_pd(0.0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a zero float. I think it should be literal zero bytes. Not sure though.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A zero float happens to be encoded with all bits zeroed.

}

/// Returns vector of type `__m512` with undefined elements.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_undefined_ps)
#[inline]
#[target_feature(enable = "avx512f")]
// This intrinsic has no corresponding instruction.
pub unsafe fn _mm512_undefined_ps() -> __m512 {
_mm512_set1_ps(0.0)
}

/// Loads 512-bits (composed of 8 packed double-precision (64-bit)
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovupd))]
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
ptr::read_unaligned(mem_addr as *const __m512d)
}

/// Stores 512-bits (composed of 8 packed double-precision (64-bit)
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_pd)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovupd))]
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
ptr::copy_nonoverlapping(
&a as *const __m512d as *const u8,
mem_addr as *mut u8,
mem::size_of::<__m512d>(),
);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use ptr::write_unaligned here just like the loads.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks

}

/// Loads 512-bits (composed of 16 packed single-precision (32-bit)
/// floating-point elements) from memory into result.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_loadu_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
ptr::read_unaligned(mem_addr as *const __m512)
}

/// Stores 512-bits (composed of 16 packed single-precision (32-bit)
/// floating-point elements) from `a` into memory.
/// `mem_addr` does not need to be aligned on any particular boundary.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_storeu_ps)
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
ptr::copy_nonoverlapping(
&a as *const __m512 as *const u8,
mem_addr as *mut u8,
mem::size_of::<__m512>(),
);
}

/// Equal
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
/// Less-than
Expand Down Expand Up @@ -1702,6 +1780,8 @@ mod tests {
use stdarch_test::simd_test;

use crate::core_arch::x86::*;
use crate::core_arch::x86_64::_mm512_setr_pd;
use crate::hint::black_box;

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
Expand Down Expand Up @@ -2326,4 +2406,42 @@ mod tests {
unsafe fn test_mm512_setzero_ps() {
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_pd() {
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
let p = a.as_ptr();
let r = _mm512_loadu_pd(black_box(p));
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m512d(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_pd() {
let a = _mm512_set1_pd(9.);
let mut r = _mm512_undefined_pd();
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
assert_eq_m512d(r, a);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_ps() {
let a = &[
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
];
let p = a.as_ptr();
let r = _mm512_loadu_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}

#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}
}
3 changes: 1 addition & 2 deletions crates/core_arch/src/x86/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1865,8 +1865,7 @@ pub unsafe fn _mm_prefetch(p: *const i8, strategy: i32) {
#[target_feature(enable = "sse")]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_undefined_ps() -> __m128 {
// FIXME: this function should return MaybeUninit<__m128>
mem::MaybeUninit::<__m128>::uninit().assume_init()
_mm_set1_ps(0.0)
}

/// Transpose the 4x4 matrix formed by 4 rows of __m128 in place.
Expand Down
6 changes: 6 additions & 0 deletions crates/stdarch-verify/tests/x86-intel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,8 @@ fn verify_all_signatures() {
"_mm_tzcnt_64",
"_fxsave64",
"_fxrstor64",
"_mm512_undefined_ps",
"_mm512_undefined_pd",
];
if !skip.contains(&rust.name) {
println!(
Expand Down Expand Up @@ -625,6 +627,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),

(&Type::MutPtr(&Type::PrimFloat(32)), "float*") => {}
(&Type::MutPtr(&Type::PrimFloat(64)), "double*") => {}
(&Type::MutPtr(&Type::PrimFloat(32)), "void*") => {}
(&Type::MutPtr(&Type::PrimFloat(64)), "void*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "int*") => {}
(&Type::MutPtr(&Type::PrimSigned(32)), "__int32*") => {}
(&Type::MutPtr(&Type::PrimSigned(64)), "__int64*") => {}
Expand All @@ -646,6 +650,8 @@ fn equate(t: &Type, intel: &str, intrinsic: &str, is_const: bool) -> Result<(),

(&Type::ConstPtr(&Type::PrimFloat(32)), "float const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(64)), "double const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(32)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimFloat(64)), "void const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "int const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(32)), "__int32 const*") => {}
(&Type::ConstPtr(&Type::PrimSigned(64)), "__int64 const*") => {}
Expand Down