Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ARM] Add NEON support for ISD::ABDS/ABDU nodes. #94504

Merged
merged 1 commit into from
Jun 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 12 additions & 4 deletions llvm/lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,9 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
setOperationAction(ISD::SDIVREM, VT, Expand);
setOperationAction(ISD::UDIVREM, VT, Expand);

if (!VT.isFloatingPoint() &&
VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
for (auto Opcode : {ISD::ABS, ISD::ABDS, ISD::ABDU, ISD::SMIN, ISD::SMAX,
ISD::UMIN, ISD::UMAX})
setOperationAction(Opcode, VT, Legal);
if (!VT.isFloatingPoint())
for (auto Opcode : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
Expand Down Expand Up @@ -4174,7 +4174,15 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
}
case Intrinsic::arm_neon_vabs:
return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
Op.getOperand(1));
case Intrinsic::arm_neon_vabds:
if (Op.getValueType().isInteger())
return DAG.getNode(ISD::ABDS, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
return SDValue();
case Intrinsic::arm_neon_vabdu:
return DAG.getNode(ISD::ABDU, SDLoc(Op), Op.getValueType(),
Op.getOperand(1), Op.getOperand(2));
case Intrinsic::arm_neon_vmulls:
case Intrinsic::arm_neon_vmullu: {
unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
Expand Down
24 changes: 13 additions & 11 deletions llvm/lib/Target/ARM/ARMInstrNEON.td
Original file line number Diff line number Diff line change
Expand Up @@ -5640,10 +5640,10 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
// VABD : Vector Absolute Difference
defm VABDs : N3VInt_QHS<0, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "s", int_arm_neon_vabds, 1>;
"vabd", "s", abds, 1>;
defm VABDu : N3VInt_QHS<1, 0, 0b0111, 0, N3RegFrm,
IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q,
"vabd", "u", int_arm_neon_vabdu, 1>;
"vabd", "u", abdu, 1>;
def VABDfd : N3VDInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBIND,
"vabd", "f32", v2f32, v2f32, int_arm_neon_vabds, 1>;
def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ,
Expand All @@ -5657,20 +5657,22 @@ def VABDhq : N3VQInt<1, 0, 0b11, 0b1101, 0, N3RegFrm, IIC_VBINQ,

// VABDL : Vector Absolute Difference Long (Q = | D - D |)
defm VABDLs : N3VLIntExt_QHS<0,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "s", int_arm_neon_vabds, zext, 1>;
"vabdl", "s", abds, zext, 1>;
defm VABDLu : N3VLIntExt_QHS<1,1,0b0111,0, IIC_VSUBi4Q,
"vabdl", "u", int_arm_neon_vabdu, zext, 1>;
"vabdl", "u", abdu, zext, 1>;

let Predicates = [HasNEON] in {
def : Pat<(v8i16 (abs (sub (zext (v8i8 DPR:$opA)), (zext (v8i8 DPR:$opB))))),
def : Pat<(v8i16 (zext (abdu (v8i8 DPR:$opA), (v8i8 DPR:$opB)))),
(VABDLuv8i16 DPR:$opA, DPR:$opB)>;
def : Pat<(v4i32 (abs (sub (zext (v4i16 DPR:$opA)), (zext (v4i16 DPR:$opB))))),
def : Pat<(v4i32 (zext (abdu (v4i16 DPR:$opA), (v4i16 DPR:$opB)))),
(VABDLuv4i32 DPR:$opA, DPR:$opB)>;
def : Pat<(v2i64 (zext (abdu (v2i32 DPR:$opA), (v2i32 DPR:$opB)))),
(VABDLuv2i64 DPR:$opA, DPR:$opB)>;
}

// ISD::ABS is not legal for v2i64, so VABDL needs to be matched from the
// shift/xor pattern for ABS.

// TODO: Remove me.
def abd_shr :
PatFrag<(ops node:$in1, node:$in2, node:$shift),
(ARMvshrsImm (sub (zext node:$in1),
Expand All @@ -5686,15 +5688,15 @@ def : Pat<(xor (v2i64 (abd_shr (v2i32 DPR:$opA), (v2i32 DPR:$opB), 63)),

// VABA : Vector Absolute Difference and Accumulate
defm VABAs : N3VIntOp_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "s", int_arm_neon_vabds, add>;
"vaba", "s", abds, add>;
defm VABAu : N3VIntOp_QHS<1,0,0b0111,1, IIC_VABAD, IIC_VABAQ,
"vaba", "u", int_arm_neon_vabdu, add>;
"vaba", "u", abdu, add>;

// VABAL : Vector Absolute Difference and Accumulate Long (Q += | D - D |)
defm VABALs : N3VLIntExtOp_QHS<0,1,0b0101,0, IIC_VABAD,
"vabal", "s", int_arm_neon_vabds, zext, add>;
"vabal", "s", abds, zext, add>;
defm VABALu : N3VLIntExtOp_QHS<1,1,0b0101,0, IIC_VABAD,
"vabal", "u", int_arm_neon_vabdu, zext, add>;
"vabal", "u", abdu, zext, add>;

// Vector Maximum and Minimum.

Expand Down
Loading
Loading