Skip to content

Commit

Permalink
sse: _mm_cvtpi16_ps, _mm_cvtpu16_ps, _mm_cvtpi8_ps, _mm_cvtpu8_ps (ru…
Browse files Browse the repository at this point in the history
…st-lang#255)

* sse: _mm_cvtpi16_ps, _mm_cvtpu16_ps, _mm_cvtpi8_ps, _mm_cvtpu8_ps

And mmx:
_mm_cmpgt_pi8
_mm_cmpgt_pi16
_mm_unpackhi_pi16
_mm_unpacklo_pi8
_mm_unpacklo_pi16

* Fix: literal out of range
  • Loading branch information
gwenn authored and alexcrichton committed Dec 30, 2017
1 parent ab543f1 commit 2d77311
Show file tree
Hide file tree
Showing 2 changed files with 183 additions and 0 deletions.
101 changes: 101 additions & 0 deletions coresimd/src/x86/i686/mmx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,73 @@ pub unsafe fn _mm_packs_pi32(a: i32x2, b: i32x2) -> i16x4 {
mem::transmute(packssdw(mem::transmute(a), mem::transmute(b)))
}

/// Compares the 8-bit integer elements of two 64-bit integer vectors of
/// [8 x i8] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFF for true.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtb))]
pub unsafe fn _mm_cmpgt_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(pcmpgtb(mem::transmute(a), mem::transmute(b)))
}

/// Compares the 16-bit integer elements of two 64-bit integer vectors of
/// [4 x i16] to determine if the element of the first vector is greater than
/// the corresponding element of the second vector.
///
/// The comparison yields 0 for false, 0xFFFF for true.
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(pcmpgtw))]
pub unsafe fn _mm_cmpgt_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(pcmpgtw(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the upper 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpckhwd))] // FIXME punpcklbw expected
pub unsafe fn _mm_unpackhi_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpckhwd(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
/// and interleaves them into a 64-bit integer vector of [8 x i8].
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklbw))]
pub unsafe fn _mm_unpacklo_pi8(a: i8x8, b: i8x8) -> i8x8 {
mem::transmute(punpcklbw(mem::transmute(a), mem::transmute(b)))
}

/// Unpacks the lower 32 bits from two 64-bit integer vectors of
/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
#[inline(always)]
#[target_feature = "+mmx"]
#[cfg_attr(test, assert_instr(punpcklwd))]
pub unsafe fn _mm_unpacklo_pi16(a: i16x4, b: i16x4) -> i16x4 {
mem::transmute(punpcklwd(mem::transmute(a), mem::transmute(b)))
}

#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.mmx.packsswb"]
fn packsswb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.packssdw"]
fn packssdw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.b"]
fn pcmpgtb(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.pcmpgt.w"]
fn pcmpgtw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpckhwd"]
fn punpckhwd(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklbw"]
fn punpcklbw(a: __m64, b: __m64) -> __m64;
#[link_name = "llvm.x86.mmx.punpcklwd"]
fn punpcklwd(a: __m64, b: __m64) -> __m64;
}

#[cfg(test)]
Expand Down Expand Up @@ -83,4 +144,44 @@ mod tests {
let r = i16x4::new(-1, 2, -5, 6);
assert_eq!(r, mmx::_mm_packs_pi32(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_cmpgt_pi8() {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 7, 6, 5, 4, 3, 2, 1);
let r = i8x8::new(0, 0, 0, 0, 0, -1, -1, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi8(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_cmpgt_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 3, 2, 1);
let r = i16x4::new(0, 0, 0, -1);
assert_eq!(r, mmx::_mm_cmpgt_pi16(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpackhi_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(2, 6, 3, 7);
assert_eq!(r, mmx::_mm_unpackhi_pi16(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi8() {
let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7);
let b = i8x8::new(8, 9, 10, 11, 12, 13, 14, 15);
let r = i8x8::new(0, 8, 1, 9, 2, 10, 3, 11);
assert_eq!(r, mmx::_mm_unpacklo_pi8(a, b));
}

#[simd_test = "mmx"]
unsafe fn _mm_unpacklo_pi16() {
let a = i16x4::new(0, 1, 2, 3);
let b = i16x4::new(4, 5, 6, 7);
let r = i16x4::new(0, 4, 1, 5);
assert_eq!(r, mmx::_mm_unpacklo_pi16(a, b));
}
}
82 changes: 82 additions & 0 deletions coresimd/src/x86/i686/sse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,56 @@ pub unsafe fn _mm_cvt_pi2ps(a: f32x4, b: i32x2) -> f32x4 {
_mm_cvtpi32_ps(a, b)
}

/// Converts a 64-bit vector of [4 x i16] into a 128-bit vector of [4 x
/// float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi16_ps(a: i16x4) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi16(mem::transmute(b), a);
let c = mmx::_mm_unpackhi_pi16(a, b);
let r = i586::_mm_setzero_ps();
let r = cvtpi2ps(r, mem::transmute(c));
let r = i586::_mm_movelh_ps(r, r);
let c = mmx::_mm_unpacklo_pi16(a, b);
cvtpi2ps(r, mem::transmute(c))
}

/// Converts a 64-bit vector of 16-bit unsigned integer values into a
/// 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu16_ps(a: u16x4) -> f32x4 {
let b = mem::transmute(mmx::_mm_setzero_si64());
let c = mmx::_mm_unpackhi_pi16(a.as_i16x4(), b);
let r = i586::_mm_setzero_ps();
let r = cvtpi2ps(r, mem::transmute(c));
let r = i586::_mm_movelh_ps(r, r);
let c = mmx::_mm_unpacklo_pi16(a.as_i16x4(), b);
cvtpi2ps(r, mem::transmute(c))
}

/// Converts the lower four 8-bit values from a 64-bit vector of [8 x i8]
/// into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpi8_ps(a: i8x8) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_cmpgt_pi8(mem::transmute(b), a);
let b = mmx::_mm_unpacklo_pi8(a, b);
_mm_cvtpi16_ps(mem::transmute(b))
}

/// Converts the lower four unsigned 8-bit integer values from a 64-bit
/// vector of [8 x u8] into a 128-bit vector of [4 x float].
#[inline(always)]
#[target_feature = "+sse"]
pub unsafe fn _mm_cvtpu8_ps(a: u8x8) -> f32x4 {
let b = mmx::_mm_setzero_si64();
let b = mmx::_mm_unpacklo_pi8(a.as_i8x8(), mem::transmute(b));
_mm_cvtpi16_ps(mem::transmute(b))
}

/// Converts the two 32-bit signed integer values from each 64-bit vector
/// operand of [2 x i32] into a 128-bit vector of [4 x float].
#[inline(always)]
Expand Down Expand Up @@ -507,6 +557,38 @@ mod tests {
assert_eq!(r, expected);
}

#[simd_test = "sse"]
unsafe fn _mm_cvtpi16_ps() {
let a = i16x4::new(1, 2, 3, 4);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpi16_ps(a);
assert_eq!(r, expected);
}

#[simd_test = "sse"]
unsafe fn _mm_cvtpu16_ps() {
let a = u16x4::new(1, 2, 3, 4);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpu16_ps(a);
assert_eq!(r, expected);
}

#[simd_test = "sse"]
unsafe fn _mm_cvtpi8_ps() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpi8_ps(a);
assert_eq!(r, expected);
}

#[simd_test = "sse"]
unsafe fn _mm_cvtpu8_ps() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let expected = f32x4::new(1., 2., 3., 4.);
let r = sse::_mm_cvtpu8_ps(a);
assert_eq!(r, expected);
}

#[simd_test = "sse"]
unsafe fn _mm_cvtpi32x2_ps() {
let a = i32x2::new(1, 2);
Expand Down

0 comments on commit 2d77311

Please sign in to comment.