Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[LoongArch] Recommit "Remove spurious mask operations from andn->icmp on 16 and 8 bit values" #99798

Merged
merged 1 commit into from
Jul 22, 2024

Conversation

heiher
Copy link
Member

@heiher heiher commented Jul 21, 2024

recommit of #99272

@llvmbot
Copy link
Collaborator

llvmbot commented Jul 21, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/99798.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+166)
  • (modified) llvm/test/CodeGen/LoongArch/andn-icmp.ll (+16-41)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ba6be85c7f2e8..496f126b7173d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -335,6 +335,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::SETCC);
 
   // Set DAG combine for 'LSX' feature.
 
@@ -2528,6 +2529,169 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
+  ExtType = ISD::NON_EXTLOAD;
+
+  switch (V.getNode()->getOpcode()) {
+  case ISD::LOAD: {
+    LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
+    if ((LoadNode->getMemoryVT() == MVT::i8) ||
+        (LoadNode->getMemoryVT() == MVT::i16)) {
+      ExtType = LoadNode->getExtensionType();
+      return true;
+    }
+    return false;
+  }
+  case ISD::AssertSext: {
+    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
+      ExtType = ISD::SEXTLOAD;
+      return true;
+    }
+    return false;
+  }
+  case ISD::AssertZext: {
+    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
+      ExtType = ISD::ZEXTLOAD;
+      return true;
+    }
+    return false;
+  }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+// Eliminate redundant truncation and zero-extension nodes.
+// * Case 1:
+//  +------------+ +------------+ +------------+
+//  |   Input1   | |   Input2   | |     CC     |
+//  +------------+ +------------+ +------------+
+//         |              |              |
+//         V              V              +----+
+//  +------------+ +------------+             |
+//  |  TRUNCATE  | |  TRUNCATE  |             |
+//  +------------+ +------------+             |
+//         |              |                   |
+//         V              V                   |
+//  +------------+ +------------+             |
+//  |  ZERO_EXT  | |  ZERO_EXT  |             |
+//  +------------+ +------------+             |
+//         |              |                   |
+//         |              +-------------+     |
+//         V              V             |     |
+//        +----------------+            |     |
+//        |      AND       |            |     |
+//        +----------------+            |     |
+//                |                     |     |
+//                +---------------+     |     |
+//                                |     |     |
+//                                V     V     V
+//                               +-------------+
+//                               |     CMP     |
+//                               +-------------+
+// * Case 2:
+//  +------------+ +------------+ +-------------+ +------------+ +------------+
+//  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
+//  +------------+ +------------+ +-------------+ +------------+ +------------+
+//         |              |             |               |               |
+//         V              |             |               |               |
+//  +------------+        |             |               |               |
+//  |     XOR    |<---------------------+               |               |
+//  +------------+        |                             |               |
+//         |              |                             |               |
+//         V              V             +---------------+               |
+//  +------------+ +------------+       |                               |
+//  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
+//  +------------+ +------------+       |     |
+//         |              |             |     |
+//         V              V             |     |
+//  +------------+ +------------+       |     |
+//  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
+//  +------------+ +------------+       |     |
+//         |              |             |     |
+//         V              V             |     |
+//        +----------------+            |     |
+//        |      AND       |            |     |
+//        +----------------+            |     |
+//                |                     |     |
+//                +---------------+     |     |
+//                                |     |     |
+//                                V     V     V
+//                               +-------------+
+//                               |     CMP     |
+//                               +-------------+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const LoongArchSubtarget &Subtarget) {
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  SDNode *AndNode = N->getOperand(0).getNode();
+  if (AndNode->getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue AndInputValue2 = AndNode->getOperand(1);
+  if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+
+  SDValue CmpInputValue = N->getOperand(1);
+  SDValue AndInputValue1 = AndNode->getOperand(0);
+  if (AndInputValue1.getOpcode() == ISD::XOR) {
+    if (CC != ISD::SETEQ && CC != ISD::SETNE)
+      return SDValue();
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
+    if (!CN || CN->getSExtValue() != -1)
+      return SDValue();
+    CN = dyn_cast<ConstantSDNode>(CmpInputValue);
+    if (!CN || CN->getSExtValue() != 0)
+      return SDValue();
+    AndInputValue1 = AndInputValue1.getOperand(0);
+    if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
+    if (AndInputValue2 != CmpInputValue)
+      return SDValue();
+  } else {
+    return SDValue();
+  }
+
+  SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
+  if (TruncValue1.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
+  if (TruncValue2.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
+  SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
+  ISD::LoadExtType ExtType1;
+  ISD::LoadExtType ExtType2;
+
+  if (!checkValueWidth(TruncInputValue1, ExtType1) ||
+      !checkValueWidth(TruncInputValue2, ExtType2))
+    return SDValue();
+
+  if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
+      AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
+    return SDValue();
+
+  if ((ExtType2 != ISD::ZEXTLOAD) &&
+      ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
+    return SDValue();
+
+  // These truncation and zero-extension nodes are not necessary, remove them.
+  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
+                               TruncInputValue1, TruncInputValue2);
+  SDValue NewSetCC =
+      DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
+  DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
+  return SDValue(N, 0);
+}
+
 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
@@ -3155,6 +3319,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
     return performANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DAG, DCI, Subtarget);
+  case ISD::SETCC:
+    return performSETCCCombine(N, DAG, DCI, Subtarget);
   case ISD::SRL:
     return performSRLCombine(N, DAG, DCI, Subtarget);
   case LoongArchISD::BITREV_W:
diff --git a/llvm/test/CodeGen/LoongArch/andn-icmp.ll b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
index 447f3ac5c34fd..67d62eca914e9 100644
--- a/llvm/test/CodeGen/LoongArch/andn-icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
@@ -6,14 +6,12 @@ define i1 @andn_icmp_eq_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -25,14 +23,12 @@ define i1 @andn_icmp_eq_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -80,14 +76,12 @@ define i1 @andn_icmp_ne_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -99,14 +93,12 @@ define i1 @andn_icmp_ne_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -153,15 +145,13 @@ define i1 @andn_icmp_ne_i64(i64 %a, i64 %b) nounwind {
 define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -172,15 +162,13 @@ define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -191,16 +179,14 @@ define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -212,16 +198,14 @@ define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -233,15 +217,13 @@ define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -252,15 +234,13 @@ define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -271,16 +251,14 @@ define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -292,16 +270,14 @@ define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ule_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -606,7 +582,6 @@ define i1 @andn_icmp_eq_i8_i32(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i8_i32:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;

@heiher heiher merged commit 4c73b1a into llvm:main Jul 22, 2024
9 checks passed
@heiher heiher deleted the spurious-mask-removal branch July 22, 2024 07:10
sgundapa pushed a commit to sgundapa/upstream_effort that referenced this pull request Jul 23, 2024
yuxuanchen1997 pushed a commit that referenced this pull request Jul 25, 2024
… on 16 and 8 bit values" (#99798)

Summary: recommit of #99272

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251358
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants