Skip to content

Commit

Permalink
add neon instruction vaddlv_* (rust-lang#1129)
Browse files Browse the repository at this point in the history
  • Loading branch information
surechen authored Apr 20, 2021
1 parent d23e2a4 commit 325d722
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 0 deletions.
168 changes: 168 additions & 0 deletions crates/core_arch/src/aarch64/neon/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2876,6 +2876,110 @@ pub unsafe fn vsubq_f64(a: float64x2_t, b: float64x2_t) -> float64x2_t {
simd_sub(a, b)
}

/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlv))]
pub unsafe fn vaddlv_s16(a: int16x4_t) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v4i16")]
fn vaddlv_s16_(a: int16x4_t) -> i32;
}
vaddlv_s16_(a)
}

/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlv))]
pub unsafe fn vaddlvq_s16(a: int16x8_t) -> i32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i32.v8i16")]
fn vaddlvq_s16_(a: int16x8_t) -> i32;
}
vaddlvq_s16_(a)
}

/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlp))]
pub unsafe fn vaddlv_s32(a: int32x2_t) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v2i32")]
fn vaddlv_s32_(a: int32x2_t) -> i64;
}
vaddlv_s32_(a)
}

/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlv))]
pub unsafe fn vaddlvq_s32(a: int32x4_t) -> i64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.saddlv.i64.v4i32")]
fn vaddlvq_s32_(a: int32x4_t) -> i64;
}
vaddlvq_s32_(a)
}

/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlv))]
pub unsafe fn vaddlv_u16(a: uint16x4_t) -> u32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v4i16")]
fn vaddlv_u16_(a: uint16x4_t) -> u32;
}
vaddlv_u16_(a)
}

/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlv))]
pub unsafe fn vaddlvq_u16(a: uint16x8_t) -> u32 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i32.v8i16")]
fn vaddlvq_u16_(a: uint16x8_t) -> u32;
}
vaddlvq_u16_(a)
}

/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlp))]
pub unsafe fn vaddlv_u32(a: uint32x2_t) -> u64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v2i32")]
fn vaddlv_u32_(a: uint32x2_t) -> u64;
}
vaddlv_u32_(a)
}

/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlv))]
pub unsafe fn vaddlvq_u32(a: uint32x4_t) -> u64 {
#[allow(improper_ctypes)]
extern "C" {
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uaddlv.i64.v4i32")]
fn vaddlvq_u32_(a: uint32x4_t) -> u64;
}
vaddlvq_u32_(a)
}

/// Signed Subtract Wide
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -8458,6 +8562,70 @@ mod test {
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_s16() {
let a: i16x4 = i16x4::new(1, 2, 3, 4);
let e: i32 = 10;
let r: i32 = transmute(vaddlv_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_s16() {
let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let e: i32 = 36;
let r: i32 = transmute(vaddlvq_s16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_s32() {
let a: i32x2 = i32x2::new(1, 2);
let e: i64 = 3;
let r: i64 = transmute(vaddlv_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_s32() {
let a: i32x4 = i32x4::new(1, 2, 3, 4);
let e: i64 = 10;
let r: i64 = transmute(vaddlvq_s32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_u16() {
let a: u16x4 = u16x4::new(1, 2, 3, 4);
let e: u32 = 10;
let r: u32 = transmute(vaddlv_u16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_u16() {
let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let e: u32 = 36;
let r: u32 = transmute(vaddlvq_u16(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_u32() {
let a: u32x2 = u32x2::new(1, 2);
let e: u64 = 3;
let r: u64 = transmute(vaddlv_u32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_u32() {
let a: u32x4 = u32x4::new(1, 2, 3, 4);
let e: u64 = 10;
let r: u64 = transmute(vaddlvq_u32(transmute(a)));
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vsubw_high_s8() {
let a: i16x8 = i16x8::new(8, 9, 10, 12, 13, 14, 15, 16);
Expand Down
67 changes: 67 additions & 0 deletions crates/core_arch/src/aarch64/neon/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,15 @@ extern "C" {
#[link_name = "llvm.aarch64.neon.uaddv.i64.v2i64"]
fn vaddvq_u64_(a: uint64x2_t) -> u64;

#[link_name = "llvm.aarch64.neon.saddlv.i32.v8i8"]
fn vaddlv_s8_(a: int8x8_t) -> i32;
#[link_name = "llvm.aarch64.neon.uaddlv.i32.v8i8"]
fn vaddlv_u8_(a: uint8x8_t) -> u32;
#[link_name = "llvm.aarch64.neon.saddlv.i32.v16i8"]
fn vaddlvq_s8_(a: int8x16_t) -> i32;
#[link_name = "llvm.aarch64.neon.uaddlv.i32.v16i8"]
fn vaddlvq_u8_(a: uint8x16_t) -> u32;

#[link_name = "llvm.aarch64.neon.smaxv.i8.v8i8"]
fn vmaxv_s8_(a: int8x8_t) -> i8;
#[link_name = "llvm.aarch64.neon.smaxv.i8.6i8"]
Expand Down Expand Up @@ -1000,6 +1009,35 @@ pub unsafe fn vaddvq_u64(a: uint64x2_t) -> u64 {
vaddvq_u64_(a)
}

/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlv))]
pub unsafe fn vaddlv_s8(a: int8x8_t) -> i16 {
vaddlv_s8_(a) as i16
}
/// Signed Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(saddlv))]
pub unsafe fn vaddlvq_s8(a: int8x16_t) -> i16 {
vaddlvq_s8_(a) as i16
}
/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlv))]
pub unsafe fn vaddlv_u8(a: uint8x8_t) -> u16 {
vaddlv_u8_(a) as u16
}
/// Unsigned Add Long across Vector
#[inline]
#[target_feature(enable = "neon")]
#[cfg_attr(test, assert_instr(uaddlv))]
pub unsafe fn vaddlvq_u8(a: uint8x16_t) -> u16 {
vaddlvq_u8_(a) as u16
}

/// Polynomial multiply long
#[inline]
#[target_feature(enable = "neon")]
Expand Down Expand Up @@ -4367,6 +4405,35 @@ mod tests {
let e = 3_u64;
assert_eq!(r, e);
}

#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_s8() {
let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, -8);
let r: i16 = vaddlv_s8(transmute(a));
let e = 20_i16;
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vaddlv_u8() {
let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8);
let r: u16 = vaddlv_u8(transmute(a));
let e = 36_u16;
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_s8() {
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -16);
let r: i16 = vaddlvq_s8(transmute(a));
let e = 104_i16;
assert_eq!(r, e);
}
#[simd_test(enable = "neon")]
unsafe fn test_vaddlvq_u8() {
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r: u16 = vaddlvq_u8(transmute(a));
let e = 136_u16;
assert_eq!(r, e);
}
}

#[cfg(test)]
Expand Down
72 changes: 72 additions & 0 deletions crates/stdarch-gen/neon.spec
Original file line number Diff line number Diff line change
Expand Up @@ -1655,6 +1655,78 @@ generate float64x*_t
arm = vsub.
generate float*_t

/// Signed Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4
validate 10

aarch64 = saddlv
link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
generate int16x4_t:i32

/// Signed Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4, 5, 6, 7, 8
validate 36

aarch64 = saddlv
link-aarch64 = llvm.aarch64.neon.saddlv.i32._EXT_
generate int16x8_t:i32

/// Signed Add Long across Vector
name = vaddlv
a = 1, 2
validate 3

aarch64 = saddlp
link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
generate int32x2_t:i64

/// Signed Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4
validate 10

aarch64 = saddlv
link-aarch64 = llvm.aarch64.neon.saddlv.i64._EXT_
generate int32x4_t:i64

/// Unsigned Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4
validate 10

aarch64 = uaddlv
link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
generate uint16x4_t:u32

/// Unsigned Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4, 5, 6, 7, 8
validate 36

aarch64 = uaddlv
link-aarch64 = llvm.aarch64.neon.uaddlv.i32._EXT_
generate uint16x8_t:u32

/// Unsigned Add Long across Vector
name = vaddlv
a = 1, 2
validate 3

aarch64 = uaddlp
link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
generate uint32x2_t:u64

/// Unsigned Add Long across Vector
name = vaddlv
a = 1, 2, 3, 4
validate 10

aarch64 = uaddlv
link-aarch64 = llvm.aarch64.neon.uaddlv.i64._EXT_
generate uint32x4_t:u64

/// Subtract returning high narrow
name = vsubhn
no-q
Expand Down

0 comments on commit 325d722

Please sign in to comment.