Skip to content

Commit

Permalink
core_arch: Add LoongArch frecipe intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
heiher authored and Amanieu committed Oct 14, 2024
1 parent 1e6ee7c commit 0669fa8
Show file tree
Hide file tree
Showing 4 changed files with 184 additions and 12 deletions.
48 changes: 42 additions & 6 deletions crates/core_arch/src/loongarch64/lasx/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,14 @@ extern "unadjusted" {
fn __lasx_xvfrecip_s(a: v8f32) -> v8f32;
#[link_name = "llvm.loongarch.lasx.xvfrecip.d"]
fn __lasx_xvfrecip_d(a: v4f64) -> v4f64;
#[link_name = "llvm.loongarch.lasx.xvfrecipe.s"]
fn __lasx_xvfrecipe_s(a: v8f32) -> v8f32;
#[link_name = "llvm.loongarch.lasx.xvfrecipe.d"]
fn __lasx_xvfrecipe_d(a: v4f64) -> v4f64;
#[link_name = "llvm.loongarch.lasx.xvfrsqrte.s"]
fn __lasx_xvfrsqrte_s(a: v8f32) -> v8f32;
#[link_name = "llvm.loongarch.lasx.xvfrsqrte.d"]
fn __lasx_xvfrsqrte_d(a: v4f64) -> v4f64;
#[link_name = "llvm.loongarch.lasx.xvfrint.s"]
fn __lasx_xvfrint_s(a: v8f32) -> v8f32;
#[link_name = "llvm.loongarch.lasx.xvfrint.d"]
Expand Down Expand Up @@ -941,15 +949,15 @@ extern "unadjusted" {
#[link_name = "llvm.loongarch.lasx.xvld"]
fn __lasx_xvld(a: *const i8, b: i32) -> v32i8;
#[link_name = "llvm.loongarch.lasx.xvst"]
fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32) ;
fn __lasx_xvst(a: v32i8, b: *mut i8, c: i32);
#[link_name = "llvm.loongarch.lasx.xvstelm.b"]
fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32) ;
fn __lasx_xvstelm_b(a: v32i8, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lasx.xvstelm.h"]
fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32) ;
fn __lasx_xvstelm_h(a: v16i16, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lasx.xvstelm.w"]
fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32) ;
fn __lasx_xvstelm_w(a: v8i32, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lasx.xvstelm.d"]
fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32) ;
fn __lasx_xvstelm_d(a: v4i64, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lasx.xvinsve0.w"]
fn __lasx_xvinsve0_w(a: v8i32, b: v8i32, c: u32) -> v8i32;
#[link_name = "llvm.loongarch.lasx.xvinsve0.d"]
Expand Down Expand Up @@ -977,7 +985,7 @@ extern "unadjusted" {
#[link_name = "llvm.loongarch.lasx.xvldx"]
fn __lasx_xvldx(a: *const i8, b: i64) -> v32i8;
#[link_name = "llvm.loongarch.lasx.xvstx"]
fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64) ;
fn __lasx_xvstx(a: v32i8, b: *mut i8, c: i64);
#[link_name = "llvm.loongarch.lasx.xvextl.qu.du"]
fn __lasx_xvextl_qu_du(a: v4u64) -> v4u64;
#[link_name = "llvm.loongarch.lasx.xvinsgr2vr.w"]
Expand Down Expand Up @@ -3979,6 +3987,34 @@ pub unsafe fn lasx_xvfrecip_d(a: v4f64) -> v4f64 {
__lasx_xvfrecip_d(a)
}

#[inline]
#[target_feature(enable = "lasx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lasx_xvfrecipe_s(a: v8f32) -> v8f32 {
__lasx_xvfrecipe_s(a)
}

#[inline]
#[target_feature(enable = "lasx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lasx_xvfrecipe_d(a: v4f64) -> v4f64 {
__lasx_xvfrecipe_d(a)
}

#[inline]
#[target_feature(enable = "lasx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lasx_xvfrsqrte_s(a: v8f32) -> v8f32 {
__lasx_xvfrsqrte_s(a)
}

#[inline]
#[target_feature(enable = "lasx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lasx_xvfrsqrte_d(a: v4f64) -> v4f64 {
__lasx_xvfrsqrte_d(a)
}

#[inline]
#[target_feature(enable = "lasx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
Expand Down
68 changes: 68 additions & 0 deletions crates/core_arch/src/loongarch64/lasx/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6558,6 +6558,74 @@ unsafe fn test_lasx_xvfrecip_d() {
assert_eq!(r, transmute(lasx_xvfrecip_d(transmute(a))));
}

#[simd_test(enable = "lasx,frecipe")]
unsafe fn test_lasx_xvfrecipe_s() {
let a = u32x8::new(
1061538089, 1009467584, 1043164316, 1030910448, 1059062619, 1048927856, 1064915194,
1028524176,
);
let r = i64x4::new(
4809660548434472067,
4721787188318892829,
4644815739361740708,
4728509413412007938,
);

assert_eq!(r, transmute(lasx_xvfrecipe_s(transmute(a))));
}

#[simd_test(enable = "lasx,frecipe")]
unsafe fn test_lasx_xvfrecipe_d() {
let a = u64x4::new(
4599514006383746620,
4607114589130093485,
4603063439897885463,
4602774413388259784,
);
let r = i64x4::new(
4614125529786744832,
4607216711966392320,
4610977572161847296,
4611499011256352768,
);

assert_eq!(r, transmute(lasx_xvfrecipe_d(transmute(a))));
}

#[simd_test(enable = "lasx,frecipe")]
unsafe fn test_lasx_xvfrsqrte_s() {
let a = u32x8::new(
1042369896, 1033402040, 1063640659, 1061099374, 1064617699, 1050687308, 1049602990,
1047907124,
);
let r = i64x4::new(
4641680627989561881,
4581330281566770462,
4604034110053345047,
4612427253546066334,
);

assert_eq!(r, transmute(lasx_xvfrsqrte_s(transmute(a))));
}

#[simd_test(enable = "lasx,frecipe")]
unsafe fn test_lasx_xvfrsqrte_d() {
let a = u64x4::new(
4601640737224225970,
4602882853441572005,
4594899837086694432,
4596019513190087348,
);
let r = i64x4::new(
4609450077243572224,
4608908592999825408,
4612828109287194624,
4612346183891812352,
);

assert_eq!(r, transmute(lasx_xvfrsqrte_d(transmute(a))));
}

#[simd_test(enable = "lasx")]
unsafe fn test_lasx_xvfrint_s() {
let a = u32x8::new(
Expand Down
48 changes: 42 additions & 6 deletions crates/core_arch/src/loongarch64/lsx/generated.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,6 +698,14 @@ extern "unadjusted" {
fn __lsx_vfrecip_s(a: v4f32) -> v4f32;
#[link_name = "llvm.loongarch.lsx.vfrecip.d"]
fn __lsx_vfrecip_d(a: v2f64) -> v2f64;
#[link_name = "llvm.loongarch.lsx.vfrecipe.s"]
fn __lsx_vfrecipe_s(a: v4f32) -> v4f32;
#[link_name = "llvm.loongarch.lsx.vfrecipe.d"]
fn __lsx_vfrecipe_d(a: v2f64) -> v2f64;
#[link_name = "llvm.loongarch.lsx.vfrsqrte.s"]
fn __lsx_vfrsqrte_s(a: v4f32) -> v4f32;
#[link_name = "llvm.loongarch.lsx.vfrsqrte.d"]
fn __lsx_vfrsqrte_d(a: v2f64) -> v2f64;
#[link_name = "llvm.loongarch.lsx.vfrint.s"]
fn __lsx_vfrint_s(a: v4f32) -> v4f32;
#[link_name = "llvm.loongarch.lsx.vfrint.d"]
Expand Down Expand Up @@ -959,13 +967,13 @@ extern "unadjusted" {
#[link_name = "llvm.loongarch.lsx.vfrintrm.d"]
fn __lsx_vfrintrm_d(a: v2f64) -> v2f64;
#[link_name = "llvm.loongarch.lsx.vstelm.b"]
fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32) ;
fn __lsx_vstelm_b(a: v16i8, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lsx.vstelm.h"]
fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32) ;
fn __lsx_vstelm_h(a: v8i16, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lsx.vstelm.w"]
fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32) ;
fn __lsx_vstelm_w(a: v4i32, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lsx.vstelm.d"]
fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32) ;
fn __lsx_vstelm_d(a: v2i64, b: *mut i8, c: i32, d: u32);
#[link_name = "llvm.loongarch.lsx.vaddwev.d.w"]
fn __lsx_vaddwev_d_w(a: v4i32, b: v4i32) -> v2i64;
#[link_name = "llvm.loongarch.lsx.vaddwev.w.h"]
Expand Down Expand Up @@ -1301,7 +1309,7 @@ extern "unadjusted" {
#[link_name = "llvm.loongarch.lsx.vld"]
fn __lsx_vld(a: *const i8, b: i32) -> v16i8;
#[link_name = "llvm.loongarch.lsx.vst"]
fn __lsx_vst(a: v16i8, b: *mut i8, c: i32) ;
fn __lsx_vst(a: v16i8, b: *mut i8, c: i32);
#[link_name = "llvm.loongarch.lsx.vssrlrn.b.h"]
fn __lsx_vssrlrn_b_h(a: v8i16, b: v8i16) -> v16i8;
#[link_name = "llvm.loongarch.lsx.vssrlrn.h.w"]
Expand All @@ -1323,7 +1331,7 @@ extern "unadjusted" {
#[link_name = "llvm.loongarch.lsx.vldx"]
fn __lsx_vldx(a: *const i8, b: i64) -> v16i8;
#[link_name = "llvm.loongarch.lsx.vstx"]
fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64) ;
fn __lsx_vstx(a: v16i8, b: *mut i8, c: i64);
#[link_name = "llvm.loongarch.lsx.vextl.qu.du"]
fn __lsx_vextl_qu_du(a: v2u64) -> v2u64;
#[link_name = "llvm.loongarch.lsx.bnz.b"]
Expand Down Expand Up @@ -4068,6 +4076,34 @@ pub unsafe fn lsx_vfrecip_d(a: v2f64) -> v2f64 {
__lsx_vfrecip_d(a)
}

#[inline]
#[target_feature(enable = "lsx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lsx_vfrecipe_s(a: v4f32) -> v4f32 {
__lsx_vfrecipe_s(a)
}

#[inline]
#[target_feature(enable = "lsx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lsx_vfrecipe_d(a: v2f64) -> v2f64 {
__lsx_vfrecipe_d(a)
}

#[inline]
#[target_feature(enable = "lsx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lsx_vfrsqrte_s(a: v4f32) -> v4f32 {
__lsx_vfrsqrte_s(a)
}

#[inline]
#[target_feature(enable = "lsx,frecipe")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
pub unsafe fn lsx_vfrsqrte_d(a: v2f64) -> v2f64 {
__lsx_vfrsqrte_d(a)
}

#[inline]
#[target_feature(enable = "lsx")]
#[unstable(feature = "stdarch_loongarch", issue = "117427")]
Expand Down
32 changes: 32 additions & 0 deletions crates/core_arch/src/loongarch64/lsx/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3308,6 +3308,38 @@ unsafe fn test_lsx_vfrecip_d() {
assert_eq!(r, transmute(lsx_vfrecip_d(transmute(a))));
}

#[simd_test(enable = "lsx,frecipe")]
unsafe fn test_lsx_vfrecipe_s() {
let a = u32x4::new(1057583779, 1062308847, 1060089100, 1048454688);
let r = i64x2::new(4583644530211711115, 4647978179615164140);

assert_eq!(r, transmute(lsx_vfrecipe_s(transmute(a))));
}

#[simd_test(enable = "lsx,frecipe")]
unsafe fn test_lsx_vfrecipe_d() {
let a = u64x2::new(4605515926442181274, 4605369703273365674);
let r = i64x2::new(4608204937770303488, 4608317161507651584);

assert_eq!(r, transmute(lsx_vfrecipe_d(transmute(a))));
}

#[simd_test(enable = "lsx,frecipe")]
unsafe fn test_lsx_vfrsqrte_s() {
let a = u32x4::new(1064377488, 1055815904, 1056897740, 1064016656);
let r = i64x2::new(4592421282989204764, 4577184195020153336);

assert_eq!(r, transmute(lsx_vfrsqrte_s(transmute(a))));
}

#[simd_test(enable = "lsx,frecipe")]
unsafe fn test_lsx_vfrsqrte_d() {
let a = u64x2::new(4602766865443628663, 4605323203937791867);
let r = i64x2::new(4608986772678901760, 4607734355383549952);

assert_eq!(r, transmute(lsx_vfrsqrte_d(transmute(a))));
}

#[simd_test(enable = "lsx")]
unsafe fn test_lsx_vfrint_s() {
let a = u32x4::new(1062138521, 1056849108, 1034089720, 1038314384);
Expand Down

0 comments on commit 0669fa8

Please sign in to comment.