diff --git a/coresimd/src/x86/i686/mmx.rs b/coresimd/src/x86/i686/mmx.rs index df9cb98336312..476ad957e6dd9 100644 --- a/coresimd/src/x86/i686/mmx.rs +++ b/coresimd/src/x86/i686/mmx.rs @@ -48,12 +48,73 @@ pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 { mem::transmute(packssdw(mem::transmute(a), mem::transmute(b))) } +/// Compares the 8-bit integer elements of two 64-bit integer vectors of +/// [8 x i8] to determine if the element of the first vector is greater than +/// the corresponding element of the second vector. +/// +/// The comparison yields 0 for false, 0xFF for true. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(pcmpgtb))] +pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 { + mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b))) +} + +/// Compares the 16-bit integer elements of two 64-bit integer vectors of +/// [4 x i16] to determine if the element of the first vector is greater than +/// the corresponding element of the second vector. +/// +/// The comparison yields 0 for false, 0xFFFF for true. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(pcmpgtw))] +pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 { + mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b))) +} + +/// Unpacks the upper 32 bits from two 64-bit integer vectors of +/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected +pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 { + mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b))) +} + +/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] +/// and interleaves them into a 64-bit integer vector of [8 x i8]. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpcklbw))] +pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 { + mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b))) +} + +/// Unpacks the lower 32 bits from two 64-bit integer vectors of +/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. +#[inline(always)] +#[target_feature = "+mmx"] +#[cfg_attr(test, assert_instr(punpcklwd))] +pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 { + mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b))) +} + #[allow(improper_ctypes)] extern "C" { #[link_name = "llvm.x86.mmx.packsswb"] fn packsswb(a: __m64, b: __m64) -> __m64; #[link_name = "llvm.x86.mmx.packssdw"] fn packssdw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pcmpgt.b"] + fn pcmpgtb(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.pcmpgt.w"] + fn pcmpgtw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpckhwd"] + fn punpckhwd(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpcklbw"] + fn punpcklbw(a: __m64, b: __m64) -> __m64; + #[link_name = "llvm.x86.mmx.punpcklwd"] + fn punpcklwd(a: __m64, b: __m64) -> __m64; } #[cfg(test)] @@ -83,4 +144,44 @@ mod tests { let r = i16x4::new(-1, 2, -5, 6); assert_eq!(r, mmx::_mm_packs_pi32(a, b)); } + + #[simd_test = "mmx"] + unsafe fn _mm_cmpgt_pi8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1); + let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1); + assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b)); + } + + #[simd_test = "mmx"] + unsafe fn _mm_cmpgt_pi16() { + let a = i16x4::new(0, 1, 2, 3); + let b = i16x4::new(4, 3, 2, 1); + let r = i16x4::new(0, 0, 0, -1); + assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b)); + } + + #[simd_test = "mmx"] + unsafe fn _mm_unpackhi_pi16() { + let a = i16x4::new(0, 1, 2, 3); + let b = i16x4::new(4, 5, 6, 7); + let r = i16x4::new(2, 6, 3, 7); + assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b)); + } + + #[simd_test = "mmx"] + unsafe fn _mm_unpacklo_pi8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11); + assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b)); + } + + #[simd_test = "mmx"] + unsafe fn _mm_unpacklo_pi16() { + let a = i16x4::new(0, 1, 2, 3); + let b = i16x4::new(4, 5, 6, 7); + let r = i16x4::new(0, 4, 1, 5); + assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b)); + } } diff --git a/coresimd/src/x86/i686/sse.rs b/coresimd/src/x86/i686/sse.rs index 7ddb6e7fec777..92e3e110bc1cd 100644 --- a/coresimd/src/x86/i686/sse.rs +++ b/coresimd/src/x86/i686/sse.rs @@ -221,6 +221,56 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 { _mm_cvtpi32_ps(a, b) } +/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x +/// float]. +#[inline(always)] +#[target_feature = "+sse"] +pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a); + let c = mmx::_mm_unpackhi_pi16(a, b); + let r = i586::_mm_setzero_ps(); + let r = cvtpi2ps(r, mem::transmute(c)); + let r = i586::_mm_movelh_ps(r, r); + let c = mmx::_mm_unpacklo_pi16(a, b); + cvtpi2ps(r, mem::transmute(c)) +} + +/// Converts a 64-bit vector of 16-bit unsigned integer values into a +/// 128-bit vector of [4 x float]. +#[inline(always)] +#[target_feature = "+sse"] +pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 { + let b = mem::transmute(mmx::_mm_setzero_si64()); + let c = mmx::_mm_unpackhi_pi16(a.as_i16x4(), b); + let r = i586::_mm_setzero_ps(); + let r = cvtpi2ps(r, mem::transmute(c)); + let r = i586::_mm_movelh_ps(r, r); + let c = mmx::_mm_unpacklo_pi16(a.as_i16x4(), b); + cvtpi2ps(r, mem::transmute(c)) +} + +/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8] +/// into a 128-bit vector of [4 x float]. +#[inline(always)] +#[target_feature = "+sse"] +pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a); + let b = mmx::_mm_unpacklo_pi8(a, b); + _mm_cvtpi16_ps(mem::transmute(b)) +} + +/// Converts the lower four unsigned 8-bit integer values from a 64-bit +/// vector of [8 x u8] into a 128-bit vector of [4 x float]. +#[inline(always)] +#[target_feature = "+sse"] +pub unsafe fn _mm_cvtpu8_ps(a: u8x8) -> f32x4 { + let b = mmx::_mm_setzero_si64(); + let b = mmx::_mm_unpacklo_pi8(a.as_i8x8(), mem::transmute(b)); + _mm_cvtpi16_ps(mem::transmute(b)) +} + /// Converts the two 32-bit signed integer values from each 64-bit vector /// operand of [2 x i32] into a 128-bit vector of [4 x float]. #[inline(always)] @@ -507,6 +557,38 @@ mod tests { assert_eq!(r, expected); } + #[simd_test = "sse"] + unsafe fn _mm_cvtpi16_ps() { + let a = i16x4::new(1, 2, 3, 4); + let expected = f32x4::new(1., 2., 3., 4.); + let r = sse::_mm_cvtpi16_ps(a); + assert_eq!(r, expected); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvtpu16_ps() { + let a = u16x4::new(1, 2, 3, 4); + let expected = f32x4::new(1., 2., 3., 4.); + let r = sse::_mm_cvtpu16_ps(a); + assert_eq!(r, expected); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvtpi8_ps() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let expected = f32x4::new(1., 2., 3., 4.); + let r = sse::_mm_cvtpi8_ps(a); + assert_eq!(r, expected); + } + + #[simd_test = "sse"] + unsafe fn _mm_cvtpu8_ps() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let expected = f32x4::new(1., 2., 3., 4.); + let r = sse::_mm_cvtpu8_ps(a); + assert_eq!(r, expected); + } + #[simd_test = "sse"] unsafe fn _mm_cvtpi32x2_ps() { let a = i32x2::new(1, 2);