From 3c6d9d49a2d9aa9ae7b908d8253983d218ab59b3 Mon Sep 17 00:00:00 2001 From: TheIronBorn <> Date: Fri, 22 Jun 2018 10:59:43 -0700 Subject: [PATCH] swap bytes and shuffle intrinsics --- src/distributions/math_helpers.rs | 119 ++++++++++++++++++++++++++++++ src/distributions/mod.rs | 1 + 2 files changed, 120 insertions(+) create mode 100644 src/distributions/math_helpers.rs diff --git a/src/distributions/math_helpers.rs b/src/distributions/math_helpers.rs new file mode 100644 index 00000000000..e486854fb30 --- /dev/null +++ b/src/distributions/math_helpers.rs @@ -0,0 +1,119 @@ +//! Math helper functions + +#[cfg(feature="simd_support")] +use stdsimd::simd::*; + +// Until portable shuffles land in stdsimd, we expose and use the shuffle intrinsics directly. +#[cfg(feature="simd_support")] +extern "platform-intrinsic" { + pub (crate) fn simd_shuffle2(a: T, b: T, indices: [u32; 2]) -> U; + pub (crate) fn simd_shuffle4(a: T, b: T, indices: [u32; 4]) -> U; + pub (crate) fn simd_shuffle8(a: T, b: T, indices: [u32; 8]) -> U; + pub (crate) fn simd_shuffle16(a: T, b: T, indices: [u32; 16]) -> U; + pub (crate) fn simd_shuffle32(a: T, b: T, indices: [u32; 32]) -> U; + pub (crate) fn simd_shuffle64(a: T, b: T, indices: [u32; 64]) -> U; +} + +/// Implement byte swapping for SIMD vectors +#[cfg(feature="simd_support")] +pub trait SwapBytes { + /// `swap_bytes` for a vector (horizontally) + fn swap_bytes(self) -> Self; +} + +// `simd_shuffleX` require constant indices, making this a small pain to implement +#[cfg(feature="simd_support")] +macro_rules! impl_swap_bytes { + ($ty:ident, $vec8:ident, $shuf:ident, $indices:expr) => ( + impl SwapBytes for $ty { + fn swap_bytes(self) -> Self { + let vec8 = $vec8::from_bits(self); + let shuffled: $vec8 = unsafe { $shuf(vec8, vec8, $indices) }; + $ty::from_bits(shuffled) + } + } + ); + + // bulk impl for a shuffle intrinsic/vector width + ($vec8:ident, $shuf:ident, $indices:expr, $($ty:ident,)+) => ($( + impl_swap_bytes! { $ty, $vec8, $shuf, $indices } + )+); +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x2, + simd_shuffle2, + [1, 0], + u8x2, +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x4, + simd_shuffle4, + [3, 2, 1, 0], + u8x4, + u16x2, +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x8, + simd_shuffle8, + [7, 6, 5, 4, 3, 2, 1, 0], + u8x8, + u16x4, + u32x2, +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x16, + simd_shuffle16, + [15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + u8x16, + u16x8, + u32x4, + u64x2, +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x32, + simd_shuffle32, + [31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + u8x32, + u16x16, + u32x8, + u64x4, +} + +#[cfg(feature="simd_support")] +impl_swap_bytes! { + u8x64, + simd_shuffle64, + [63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0], + u8x64, + u16x32, + u32x16, + u64x8, +} + +#[cfg(test)] +mod tests { + use super::*; + use core::mem; + + #[test] + #[cfg(feature="simd_support")] + fn swap_bytes_128() { + let x: u128 = 0x2d99787926d46932a4c1f32680f70c55; + let expected = x.swap_bytes(); + + let vec: u8x16 = unsafe { mem::transmute(x) }; + let actual = unsafe { mem::transmute(vec.swap_bytes()) }; + + assert_eq!(expected, actual); + } +} diff --git a/src/distributions/mod.rs b/src/distributions/mod.rs index bba230b7459..041f218724a 100644 --- a/src/distributions/mod.rs +++ b/src/distributions/mod.rs @@ -208,6 +208,7 @@ mod float; mod integer; #[cfg(feature="std")] mod log_gamma; +mod math_helpers; mod other; #[cfg(feature="std")] mod ziggurat_tables;