From 566231dc7febd6b80bd4e925b11d1d378d31d624 Mon Sep 17 00:00:00 2001 From: hev Date: Thu, 18 Jul 2024 09:32:45 +0800 Subject: [PATCH] [LoongArch] Remove spurious mask operations from andn->icmp on 16 and 8 bit values (#99272) --- .../LoongArch/LoongArchISelLowering.cpp | 162 ++++++++++++++++++ llvm/test/CodeGen/LoongArch/andn-icmp.ll | 56 ++---- 2 files changed, 178 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index ba6be85c7f2e850..6072e5e244263fb 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -335,6 +335,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SETCC); // Set DAG combine for 'LSX' feature. @@ -2528,6 +2529,165 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) { + ExtType = ISD::NON_EXTLOAD; + + switch (V.getNode()->getOpcode()) { + case ISD::LOAD: { + LoadSDNode *LoadNode = cast(V.getNode()); + if ((LoadNode->getMemoryVT() == MVT::i8) || + (LoadNode->getMemoryVT() == MVT::i16)) { + ExtType = LoadNode->getExtensionType(); + return true; + } + return false; + } + case ISD::AssertSext: { + VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); + if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { + ExtType = ISD::SEXTLOAD; + return true; + } + return false; + } + case ISD::AssertZext: { + VTSDNode *TypeNode = cast(V.getNode()->getOperand(1)); + if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) { + ExtType = ISD::ZEXTLOAD; + return true; + } + return false; + } + default: + return false; + } + + return false; +} + +// Eliminate redundant truncation and zero-extension nodes. +// * Case 1: +// +------------+ +------------+ +------------+ +// | Input1 | | Input2 | | CC | +// +------------+ +------------+ +------------+ +// | | | +// V V +----+ +// +------------+ +------------+ | +// | TRUNCATE | | TRUNCATE | | +// +------------+ +------------+ | +// | | | +// V V | +// +------------+ +------------+ | +// | ZERO_EXT | | ZERO_EXT | | +// +------------+ +------------+ | +// | | | +// | +-------------+ | +// V V | | +// +----------------+ | | +// | AND | | | +// +----------------+ | | +// | | | +// +---------------+ | | +// | | | +// V V V +// +-------------+ +// | CMP | +// +-------------+ +// * Case 2: +// +------------+ +------------+ +-------------+ +------------+ +------------+ +// | Input1 | | Input2 | | Constant -1 | | Constant 0 | | CC | +// +------------+ +------------+ +-------------+ +------------+ +------------+ +// | | | | | +// V | | | | +// +------------+ | | | | +// | XOR |<---------------------+ | | +// +------------+ | | | +// | | | | +// V V +---------------+ | +// +------------+ +------------+ | | +// | TRUNCATE | | TRUNCATE | | +-------------------------+ +// +------------+ +------------+ | | +// | | | | +// V V | | +// +------------+ +------------+ | | +// | ZERO_EXT | | ZERO_EXT | | | +// +------------+ +------------+ | | +// | | | | +// V V | | +// +----------------+ | | +// | AND | | | +// +----------------+ | | +// | | | +// +---------------+ | | +// | | | +// V V V +// +-------------+ +// | CMP | +// +-------------+ +static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const LoongArchSubtarget &Subtarget) { + ISD::CondCode CC = cast(N->getOperand(2))->get(); + + SDNode *AndNode = N->getOperand(0).getNode(); + if (AndNode->getOpcode() != ISD::AND) + return SDValue(); + + SDValue AndInputValue2 = AndNode->getOperand(1); + if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + + SDValue CmpInputValue = N->getOperand(1); + SDValue AndInputValue1 = AndNode->getOperand(0); + if (AndInputValue1.getOpcode() == ISD::XOR) { + if (CC != ISD::SETEQ && CC != ISD::SETNE) + return SDValue(); + ConstantSDNode *CN = dyn_cast(AndInputValue1.getOperand(1)); + if (!CN || CN->getSExtValue() != -1) + return SDValue(); + CN = dyn_cast(CmpInputValue); + if (!CN || CN->getSExtValue() != 0) + return SDValue(); + AndInputValue1 = AndInputValue1.getOperand(0); + if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND) + return SDValue(); + } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) { + if (AndInputValue2 != CmpInputValue) + return SDValue(); + } else { + return SDValue(); + } + + SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0); + if (TruncValue1.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0); + if (TruncValue2.getOpcode() != ISD::TRUNCATE) + return SDValue(); + + SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0); + SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0); + ISD::LoadExtType ExtType1; + ISD::LoadExtType ExtType2; + + if (!checkValueWidth(TruncInputValue1, ExtType1) || + !checkValueWidth(TruncInputValue2, ExtType2)) + return SDValue(); + + if ((ExtType2 != ISD::ZEXTLOAD) && + ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD))) + return SDValue(); + + // These truncation and zero-extension nodes are not necessary, remove them. + SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0), + TruncInputValue1, TruncInputValue2); + SDValue NewSetCC = + DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC); + DAG.ReplaceAllUsesWith(N, NewSetCC.getNode()); + return SDValue(N, 0); +} + // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b. static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, @@ -3155,6 +3315,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N, return performANDCombine(N, DAG, DCI, Subtarget); case ISD::OR: return performORCombine(N, DAG, DCI, Subtarget); + case ISD::SETCC: + return performSETCCCombine(N, DAG, DCI, Subtarget); case ISD::SRL: return performSRLCombine(N, DAG, DCI, Subtarget); case LoongArchISD::BITREV_W: diff --git a/llvm/test/CodeGen/LoongArch/andn-icmp.ll b/llvm/test/CodeGen/LoongArch/andn-icmp.ll index 4fc3c8df4664c6b..6d07e7a947297fb 100644 --- a/llvm/test/CodeGen/LoongArch/andn-icmp.ll +++ b/llvm/test/CodeGen/LoongArch/andn-icmp.ll @@ -6,14 +6,12 @@ define i1 @andn_icmp_eq_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_eq_i8: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 -; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_eq_i8: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 -; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -25,14 +23,12 @@ define i1 @andn_icmp_eq_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_eq_i16: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: sltui $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_eq_i16: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: sltui $a0, $a0, 1 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -80,14 +76,12 @@ define i1 @andn_icmp_ne_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ne_i8: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 -; LA32-NEXT: andi $a0, $a0, 255 ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ne_i8: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 -; LA64-NEXT: andi $a0, $a0, 255 ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -99,14 +93,12 @@ define i1 @andn_icmp_ne_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ne_i16: ; LA32: # %bb.0: ; LA32-NEXT: andn $a0, $a1, $a0 -; LA32-NEXT: bstrpick.w $a0, $a0, 15, 0 ; LA32-NEXT: sltu $a0, $zero, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ne_i16: ; LA64: # %bb.0: ; LA64-NEXT: andn $a0, $a1, $a0 -; LA64-NEXT: bstrpick.d $a0, $a0, 15, 0 ; LA64-NEXT: sltu $a0, $zero, $a0 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -153,15 +145,13 @@ define i1 @andn_icmp_ne_i64(i64 %a, i64 %b) nounwind { define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ult_i8: ; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ult_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -172,15 +162,13 @@ define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ult_i16: ; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ult_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -191,16 +179,14 @@ define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_uge_i8: ; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_uge_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -212,16 +198,14 @@ define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_uge_i16: ; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a0, $a1 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_uge_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a0, $a1 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -233,15 +217,13 @@ define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ugt_i8: ; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ugt_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: ret %and = and i8 %a, %b @@ -252,15 +234,13 @@ define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ugt_i16: ; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ugt_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: ret %and = and i16 %a, %b @@ -271,16 +251,14 @@ define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind { define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ule_i8: ; LA32: # %bb.0: -; LA32-NEXT: andi $a1, $a1, 255 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ule_i8: ; LA64: # %bb.0: -; LA64-NEXT: andi $a1, $a1, 255 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret @@ -292,16 +270,14 @@ define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind { define i1 @andn_icmp_ule_i16(i16 signext %a, i16 signext %b) nounwind { ; LA32-LABEL: andn_icmp_ule_i16: ; LA32: # %bb.0: -; LA32-NEXT: bstrpick.w $a1, $a1, 15, 0 -; LA32-NEXT: and $a0, $a1, $a0 +; LA32-NEXT: and $a0, $a0, $a1 ; LA32-NEXT: sltu $a0, $a1, $a0 ; LA32-NEXT: xori $a0, $a0, 1 ; LA32-NEXT: ret ; ; LA64-LABEL: andn_icmp_ule_i16: ; LA64: # %bb.0: -; LA64-NEXT: bstrpick.d $a1, $a1, 15, 0 -; LA64-NEXT: and $a0, $a1, $a0 +; LA64-NEXT: and $a0, $a0, $a1 ; LA64-NEXT: sltu $a0, $a1, $a0 ; LA64-NEXT: xori $a0, $a0, 1 ; LA64-NEXT: ret