Skip to content

Commit

Permalink
Use packed_simd::shuffle instead of vqtbx1q_u8
Browse files Browse the repository at this point in the history
  • Loading branch information
rubdos committed Dec 8, 2022
1 parent d60beaa commit 343be3a
Showing 1 changed file with 6 additions and 11 deletions.
17 changes: 6 additions & 11 deletions src/backend/vector/neon/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,33 +212,28 @@ impl FieldElement2625x4 {
#[inline(always)]
fn blend_lanes(x: (u32x4, u32x4), y: (u32x4, u32x4), control: Lanes) -> (u32x4, u32x4) {
unsafe {
use core::arch::aarch64::vqtbx1q_u8;
use packed_simd::shuffle;
match control {
Lanes::C => {
(x.0,
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits())
(x.0, shuffle!(y.1, x.1, [0, 5, 2, 7]))
}
Lanes::D => {
(x.0,
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits()).into_bits())
(x.0, shuffle!(y.1, x.1, [4, 1, 6, 3]))
}
Lanes::AD => {
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits(),
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits())
(shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [4, 1, 6, 3]))
}
Lanes::AB => {
(y.0, x.1)
}
Lanes::AC => {
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits(),
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits()).into_bits())
(shuffle!(y.0, x.0, [0, 5, 2, 7]), shuffle!(y.1, x.1, [0, 5, 2, 7]))
}
Lanes::CD => {
(x.0, y.1)
}
Lanes::BC => {
(vqtbx1q_u8(x.0.into_bits(), y.0.into_bits(), u8x16::new(16, 16, 16, 16, 4, 5, 6, 7, 16, 16, 16, 16, 12, 13, 14, 15).into_bits() ).into_bits(),
vqtbx1q_u8(x.1.into_bits(), y.1.into_bits(), u8x16::new( 0, 1, 2, 3, 16, 16, 16, 16, 8, 9, 10, 11, 16, 16, 16, 16).into_bits() ).into_bits())
(shuffle!(y.0, x.0, [4, 1, 6, 3]), shuffle!(y.1, x.1, [0, 5, 2, 7]))
}
Lanes::ABCD => {
y
Expand Down

0 comments on commit 343be3a

Please sign in to comment.