[RISCV] Narrow indices of fixed vector gather/scatter nodes #66405

preames · 2023-09-14T17:30:51Z

Review wise, I'm trying something different. This PR is what would have been four stacked changes in phabricator. Having different PRs (and thus duplicated commits) seemed like a major headache, so I'm posting all three changes in one PR. I will land them individually after review. Feel free to LGTM individual changes (please be clear on which), and I will rebase as appropriate.

llvmbot · 2023-09-14T17:31:34Z

@llvm/pr-subscribers-backend-aarch64
@llvm/pr-subscribers-llvm-selectiondag

@llvm/pr-subscribers-backend-risc-v

Changes

Review wise, I'm trying something different. This PR is what would have been three stacked changes in phabricator. Having different PRs (and thus duplicated commits) seemed like a major headache, so I'm posting all three changes in one PR. I will land them individually after review. Feel free to LGTM individual changes (please be clear on which), and I will rebase as appropriate. --

Patch is 62.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66405.diff

12 Files Affected:

(modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3-3)
(modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6-9)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-1)
(modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1-1)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+55-34)
(modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+1-1)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+66-66)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+66-66)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+82-90)
(modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll (+60-64)
(modified) llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (+2-4)
(modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+6-8)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 12b280d5b1a0bcd..4879c0c5dcff10b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1460,9 +1460,9 @@ class TargetLoweringBase {
   /// extending
   virtual bool shouldExtendGSIndex(EVT VT, EVT &amp;EltTy) const { return false; }
 
-  // Returns true if VT is a legal index type for masked gathers/scatters
-  // on this target
-  virtual bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const {
+  // Returns true if Extend can be folded into the index of a masked gathers/scatters
+  // on this target.
+  virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const {
     return false;
   }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd34c0dce0f95a0..5d0d70a40e408b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11680,10 +11680,9 @@ bool refineIndexType(SDValue &amp;Index, ISD::MemIndexType &amp;IndexType, EVT DataVT,
 
   // It&#x27;s always safe to look through zero extends.
   if (Index.getOpcode() == ISD::ZERO_EXTEND) {
-    SDValue Op = Index.getOperand(0);
-    if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+    if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
       IndexType = ISD::UNSIGNED_SCALED;
-      Index = Op;
+      Index = Index.getOperand(0);
       return true;
     }
     if (ISD::isIndexTypeSigned(IndexType)) {
@@ -11694,12 +11693,10 @@ bool refineIndexType(SDValue &amp;Index, ISD::MemIndexType &amp;IndexType, EVT DataVT,
 
   // It&#x27;s only safe to look through sign extends when Index is signed.
   if (Index.getOpcode() == ISD::SIGN_EXTEND &amp;&amp;
-      ISD::isIndexTypeSigned(IndexType)) {
-    SDValue Op = Index.getOperand(0);
-    if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
-      Index = Op;
-      return true;
-    }
+      ISD::isIndexTypeSigned(IndexType) &amp;&amp;
+      TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
+    Index = Index.getOperand(0);
+    return true;
   }
 
   return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c65c52e39201ac6..337fe80d0a9018d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5352,8 +5352,9 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &amp;EltTy) const {
   return false;
 }
 
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
                                                           EVT DataVT) const {
+  const EVT IndexVT = Extend.getOperand(0).getValueType();
   // SVE only supports implicit extension of 32-bit indices.
   if (!Subtarget-&gt;hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
     return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 67c344318e0d3ec..32970e9e45dec8f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1185,7 +1185,7 @@ class AArch64TargetLowering : public TargetLowering {
                                       SelectionDAG &amp;DAG) const override;
 
   bool shouldExtendGSIndex(EVT VT, EVT &amp;EltTy) const override;
-  bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+  bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
   bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
   bool isUsedByReturnOnly(SDNode *N, SDValue &amp;Chain) const override;
   bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a470ceae90ce591..25fe102283995cf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11620,21 +11620,24 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &amp;DAG,
 // zero-extended their indices, \p narrowIndex tries to narrow the type of index
 // operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C &lt;
 // bits(ty).
-static SDValue narrowIndex(SDValue N, SelectionDAG &amp;DAG) {
+static bool narrowIndex(SDValue &amp;N, ISD::MemIndexType IndexType, SelectionDAG &amp;DAG) {
+  if (isIndexTypeSigned(IndexType))
+    return false;
+
   if (N.getOpcode() != ISD::SHL || !N-&gt;hasOneUse())
-    return SDValue();
+    return false;
 
   SDValue N0 = N.getOperand(0);
   if (N0.getOpcode() != ISD::ZERO_EXTEND &amp;&amp;
       N0.getOpcode() != RISCVISD::VZEXT_VL)
-    return SDValue();
+    return false;;
   if (!N0-&gt;hasOneUse())
-    return SDValue();
+    return false;;
 
   APInt ShAmt;
   SDValue N1 = N.getOperand(1);
   if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
-    return SDValue();
+    return false;;
 
   SDLoc DL(N);
   SDValue Src = N0.getOperand(0);
@@ -11646,14 +11649,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &amp;DAG) {
 
   // Skip if NewElen is not narrower than the original extended type.
   if (NewElen &gt;= N0.getValueType().getScalarSizeInBits())
-    return SDValue();
+    return false;
 
   EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
   EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
 
   SDValue NewExt = DAG.getNode(N0-&gt;getOpcode(), DL, NewVT, N0-&gt;ops());
   SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
-  return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+  N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+  return true;
 }
 
 // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
@@ -13493,19 +13497,20 @@ static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &amp;Index,
     DAG.getMachineFunction().getSubtarget&lt;RISCVSubtarget&gt;().getXLenVT();
 
   const EVT IndexVT = Index.getValueType();
-  const bool IsIndexSigned = isIndexTypeSigned(IndexType);
 
   // RISC-V indexed loads only support the &quot;unsigned unscaled&quot; addressing
   // mode, so anything else must be manually legalized.
-  if (!IsIndexSigned || !IndexVT.getVectorElementType().bitsLT(XLenVT))
+  if (!isIndexTypeSigned(IndexType))
     return false;
 
-  // Any index legalization should first promote to XLenVT, so we don&#x27;t lose
-  // bits when scaling. This may create an illegal index type so we let
-  // LLVM&#x27;s legalization take care of the splitting.
-  // FIXME: LLVM can&#x27;t split VP_GATHER or VP_SCATTER yet.
-  Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
-                      IndexVT.changeVectorElementType(XLenVT), Index);
+  if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
+    // Any index legalization should first promote to XLenVT, so we don&#x27;t lose
+    // bits when scaling. This may create an illegal index type so we let
+    // LLVM&#x27;s legalization take care of the splitting.
+    // FIXME: LLVM can&#x27;t split VP_GATHER or VP_SCATTER yet.
+    Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
+                        IndexVT.changeVectorElementType(XLenVT), Index);
+  }
   IndexType = ISD::UNSIGNED_SCALED;
   return true;
 }
@@ -13870,6 +13875,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
           {MGN-&gt;getChain(), MGN-&gt;getPassThru(), MGN-&gt;getMask(),
            MGN-&gt;getBasePtr(), Index, ScaleOp},
           MGN-&gt;getMemOperand(), IndexType, MGN-&gt;getExtensionType());
+
+    if (narrowIndex(Index, IndexType, DAG))
+      return DAG.getMaskedGather(
+          N-&gt;getVTList(), MGN-&gt;getMemoryVT(), DL,
+          {MGN-&gt;getChain(), MGN-&gt;getPassThru(), MGN-&gt;getMask(),
+           MGN-&gt;getBasePtr(), Index, ScaleOp},
+          MGN-&gt;getMemOperand(), IndexType, MGN-&gt;getExtensionType());
     break;
   }
   case ISD::MSCATTER:{
@@ -13887,6 +13899,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
           {MSN-&gt;getChain(), MSN-&gt;getValue(), MSN-&gt;getMask(), MSN-&gt;getBasePtr(),
            Index, ScaleOp},
           MSN-&gt;getMemOperand(), IndexType, MSN-&gt;isTruncatingStore());
+
+    if (narrowIndex(Index, IndexType, DAG))
+      return DAG.getMaskedScatter(
+          N-&gt;getVTList(), MSN-&gt;getMemoryVT(), DL,
+          {MSN-&gt;getChain(), MSN-&gt;getValue(), MSN-&gt;getMask(), MSN-&gt;getBasePtr(),
+           Index, ScaleOp},
+          MSN-&gt;getMemOperand(), IndexType, MSN-&gt;isTruncatingStore());
     break;
   }
   case ISD::VP_GATHER: {
@@ -13904,6 +13923,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                               ScaleOp, VPGN-&gt;getMask(),
                               VPGN-&gt;getVectorLength()},
                              VPGN-&gt;getMemOperand(), IndexType);
+
+    if (narrowIndex(Index, IndexType, DAG))
+      return DAG.getGatherVP(N-&gt;getVTList(), VPGN-&gt;getMemoryVT(), DL,
+                             {VPGN-&gt;getChain(), VPGN-&gt;getBasePtr(), Index,
+                              ScaleOp, VPGN-&gt;getMask(),
+                              VPGN-&gt;getVectorLength()},
+                             VPGN-&gt;getMemOperand(), IndexType);
+
     break;
   }
   case ISD::VP_SCATTER: {
@@ -13921,6 +13948,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
                                VPSN-&gt;getBasePtr(), Index, ScaleOp,
                                VPSN-&gt;getMask(), VPSN-&gt;getVectorLength()},
                               VPSN-&gt;getMemOperand(), IndexType);
+
+    if (narrowIndex(Index, IndexType, DAG))
+      return DAG.getScatterVP(N-&gt;getVTList(), VPSN-&gt;getMemoryVT(), DL,
+                              {VPSN-&gt;getChain(), VPSN-&gt;getValue(),
+                               VPSN-&gt;getBasePtr(), Index, ScaleOp,
+                               VPSN-&gt;getMask(), VPSN-&gt;getVectorLength()},
+                              VPSN-&gt;getMemOperand(), IndexType);
     break;
   }
   case RISCVISD::SRA_VL:
@@ -14225,23 +14259,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
         return DAG.getConstant(-1, DL, VT);
       return DAG.getConstant(0, DL, VT);
     }
-    case Intrinsic::riscv_vloxei:
-    case Intrinsic::riscv_vloxei_mask:
-    case Intrinsic::riscv_vluxei:
-    case Intrinsic::riscv_vluxei_mask:
-    case Intrinsic::riscv_vsoxei:
-    case Intrinsic::riscv_vsoxei_mask:
-    case Intrinsic::riscv_vsuxei:
-    case Intrinsic::riscv_vsuxei_mask:
-      if (SDValue V = narrowIndex(N-&gt;getOperand(4), DAG)) {
-        SmallVector&lt;SDValue, 8&gt; Ops(N-&gt;ops());
-        Ops[4] = V;
-        const auto *MemSD = cast&lt;MemIntrinsicSDNode&gt;(N);
-        return DAG.getMemIntrinsicNode(N-&gt;getOpcode(), SDLoc(N), N-&gt;getVTList(),
-                                       Ops, MemSD-&gt;getMemoryVT(),
-                                       MemSD-&gt;getMemOperand());
-      }
-      return SDValue();
     }
   }
   case ISD::BITCAST: {
@@ -17677,9 +17694,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
   return Result;
 }
 
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
                                                         EVT DataVT) const {
-  return false;
+  // We have indexed loads for all legal index types.  Indices are always
+  // zero extended
+  return Extend.getOpcode() == ISD::ZERO_EXTEND &amp;&amp;
+    isTypeLegal(Extend.getValueType()) &amp;&amp;
+    isTypeLegal(Extend.getOperand(0).getValueType());
 }
 
 bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 461b929643f2688..695cbaf886f8aeb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -747,7 +747,7 @@ class RISCVTargetLowering : public TargetLowering {
                                            const RISCVRegisterInfo *TRI);
   MVT getContainerForFixedLengthVector(MVT VT) const;
 
-  bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+  bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
 
   bool isLegalElementTypeForRVV(EVT ScalarTy) const;
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index f3af177ac0ff27e..6c6ffe656f433b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1716,21 +1716,19 @@ define &lt;8 x i16&gt; @mgather_baseidx_sext_v8i8_v8i16(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8
 define &lt;8 x i16&gt; @mgather_baseidx_zext_v8i8_v8i16(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8 x i1&gt; %m, &lt;8 x i16&gt; %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vzext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vwaddu.vv v10, v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT:    vluxei16.v v9, (a0), v10, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT:    vzext.vf8 v12, v8
-; RV64V-NEXT:    vadd.vv v12, v12, v12
+; RV64V-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT:    vwaddu.vv v10, v8, v8
 ; RV64V-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT:    vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT:    vluxei16.v v9, (a0), v10, v0.t
 ; RV64V-NEXT:    vmv.v.v v8, v9
 ; RV64V-NEXT:    ret
 ;
@@ -2793,20 +2791,21 @@ define &lt;8 x i32&gt; @mgather_baseidx_sext_v8i8_v8i32(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8
 define &lt;8 x i32&gt; @mgather_baseidx_zext_v8i8_v8i32(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8 x i1&gt; %m, &lt;8 x i32&gt; %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT:    vzext.vf4 v12, v8
-; RV32-NEXT:    vsll.vi v8, v12, 2
-; RV32-NEXT:    vluxei32.v v10, (a0), v8, v0.t
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vzext.vf2 v9, v8
+; RV32-NEXT:    vsll.vi v8, v9, 2
+; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT:    vluxei16.v v10, (a0), v8, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT:    vzext.vf8 v12, v8
-; RV64V-NEXT:    vsll.vi v12, v12, 2
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT:    vzext.vf2 v9, v8
+; RV64V-NEXT:    vsll.vi v8, v9, 2
 ; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
+; RV64V-NEXT:    vluxei16.v v10, (a0), v8, v0.t
 ; RV64V-NEXT:    vmv.v.v v8, v10
 ; RV64V-NEXT:    ret
 ;
@@ -3264,11 +3263,10 @@ define &lt;8 x i32&gt; @mgather_baseidx_zext_v8i16_v8i32(ptr %base, &lt;8 x i16&gt; %idxs, &lt;
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT:    vzext.vf4 v12, v8
-; RV64V-NEXT:    vsll.vi v12, v12, 2
-; RV64V-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64V-NEXT:    vluxei64.v v10, (a0), v12, v0.t
+; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, mu
+; RV64V-NEXT:    vzext.vf2 v12, v8
+; RV64V-NEXT:    vsll.vi v8, v12, 2
+; RV64V-NEXT:    vluxei32.v v10, (a0), v8, v0.t
 ; RV64V-NEXT:    vmv.v.v v8, v10
 ; RV64V-NEXT:    ret
 ;
@@ -4772,20 +4770,21 @@ define &lt;8 x i64&gt; @mgather_baseidx_sext_v8i8_v8i64(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8
 define &lt;8 x i64&gt; @mgather_baseidx_zext_v8i8_v8i64(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8 x i1&gt; %m, &lt;8 x i64&gt; %passthru) {
 ; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
 ; RV32V:       # %bb.0:
-; RV32V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT:    vzext.vf4 v10, v8
-; RV32V-NEXT:    vsll.vi v8, v10, 3
+; RV32V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-NEXT:    vzext.vf2 v9, v8
+; RV32V-NEXT:    vsll.vi v8, v9, 3
 ; RV32V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV32V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
+; RV32V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
 ; RV32V-NEXT:    vmv.v.v v8, v12
 ; RV32V-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64V-NEXT:    vzext.vf8 v16, v8
-; RV64V-NEXT:    vsll.vi v8, v16, 3
-; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
+; RV64V-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT:    vzext.vf2 v9, v8
+; RV64V-NEXT:    vsll.vi v8, v9, 3
+; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64V-NEXT:    vluxei16.v v12, (a0), v8, v0.t
 ; RV64V-NEXT:    vmv.v.v v8, v12
 ; RV64V-NEXT:    ret
 ;
@@ -5616,10 +5615,11 @@ define &lt;8 x i64&gt; @mgather_baseidx_zext_v8i16_v8i64(ptr %base, &lt;8 x i16&gt; %idxs, &lt;
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivli zero, 8, e64, m4, ta, mu
-; RV64V-NEXT:    vzext.vf4 v16, v8
-; RV64V-NEXT:    vsll.vi v8, v16, 3
-; RV64V-NEXT:    vluxei64.v v12, (a0), v8, v0.t
+; RV64V-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT:    vzext.vf2 v10, v8
+; RV64V-NEXT:    vsll.vi v8, v10, 3
+; RV64V-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
+; RV64V-NEXT:    vluxei32.v v12, (a0), v8, v0.t
 ; RV64V-NEXT:    vmv.v.v v8, v12
 ; RV64V-NEXT:    ret
 ;
@@ -7645,21 +7645,19 @@ define &lt;8 x half&gt; @mgather_baseidx_sext_v8i8_v8f16(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8
 define &lt;8 x half&gt; @mgather_baseidx_zext_v8i8_v8f16(ptr %base, &lt;8 x i8&gt; %idxs, &lt;8 x i1&gt; %m, &lt;8 x half&gt; %passthru) {
 ; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vzext.vf4 v10, v8
-; RV32-NEXT:    vadd.vv v10, v10, v10
+; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT:    vwaddu.vv v10, v8, v8
 ; RV32-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT:    vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT:    vluxei16.v v9, (a0), v10, v0.t
 ; RV32-NEXT:    vmv.v.v v8, v9
 ; RV32-NEXT:    ret
 ;
 ; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
 ; RV64V:       # %bb.0:
-; RV64V-NEXT:    vsetivl...

preames · 2023-09-14T18:09:14Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

@@ -14225,23 +14259,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
        return DAG.getConstant(-1, DL, VT);
      return DAG.getConstant(0, DL, VT);
    }
-    case Intrinsic::riscv_vloxei:


To draw attention here. I removed the previous transform over the riscv specific nodes. I could leave it here if desired, I'm just not sure that's worthwhile. The result of removing is some test regression in CodeGen/RISCV/rvv/narrow-shift-extend.ll, but I'm not sure if this regression "matters" or if this is synthetic test coverage. Reverting this piece of the change is simple, I'm happy to go with reviewer preference here.

topperc · 2023-09-15T20:42:56Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  SDLoc DL(N);
+
+  // In general, what we're doing here is seeing if we can sink a truncate to
+  // a smaller element type into the expresson tree building our index.


expresson -> expression

topperc · 2023-09-15T20:43:18Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

-// zero-extended their indices, \p narrowIndex tries to narrow the type of index
-// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
-// bits(ty).
+/// According to the property that indexed load/store instructions zero-extended


zero-extended -> zero extend

topperc · 2023-09-15T20:46:17Z

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

+  if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) {
+    KnownBits Known = DAG.computeKnownBits(N);
+    unsigned ActiveBits = Known.countMaxActiveBits();
+    EVT ResultVT = VT.getVectorElementType();


Maybe something like?

EVT ResultVT = EVT::getIntegerType(Context, ActiveBits).getRoundIntegerType(Context)?

topperc · 2023-09-15T20:50:59Z

The first 3 patches LGTM

preames · 2023-09-15T22:10:58Z

Since github makes this really hard to follow, let me spell out that the three changes approved by @topperc above were landed as the following:

commit 37aa07ad312f58bd2ca59ea712a72544f753f5d3
Author: Philip Reames <preames@rivosinc.com>
Date:   Wed Sep 13 14:56:15 2023 -0700

    [RISCV] Move narrowIndex to be a DAG combine over target

commit 2ff9175af73eb4df7bd2fda35591a95e0c6ba236
Author: Philip Reames <preames@rivosinc.com>
Date:   Thu Sep 14 09:58:52 2023 -0700

    [RISCV] Normalize gather/scatter addressing to UNSIGNED_SCALAR

commit 09a5aac514fa85cb202bfb2faa22ed6b7a29d94a
Author: Philip Reames <preames@rivosinc.com>
Date:   Wed Sep 13 14:39:08 2023 -0700

    [TLI] Add extend as explicit parameter to shouldRemoveExtendFromGSIndex [nfc]

I have not yet addressed the changes requested for the final change, and will do so shortly.

Doing so allows the use of smaller constants overall, and may allow (for some small vector constants) avoiding the constant pool entirely. As seen in some of the tests, this can result in extra VTYPE toggles if we get unlucky. We could reasonable restrict this to > LMUL1 types, but I think it's also reasonable not to. What do reviewwers think?

preames · 2023-09-18T17:54:12Z

@topperc ping on the last patch

topperc

LGTM

Doing so allows the use of smaller constants overall, and may allow (for some small vector constants) avoiding the constant pool entirely. This can result in extra VTYPE toggles if we get unlucky. This was reviewed under PR #66405.

preames · 2023-09-18T18:49:45Z

The last change under this PR was landed as e52c558.

Doing so allows the use of smaller constants overall, and may allow (for some small vector constants) avoiding the constant pool entirely. This can result in extra VTYPE toggles if we get unlucky. This was reviewed under PR llvm#66405.

preames added the backend:RISC-V label Sep 14, 2023

preames requested review from asb, lukel97, topperc and a team September 14, 2023 17:30

llvmbot added backend:AArch64 llvm:SelectionDAG SelectionDAGISel as well labels Sep 14, 2023

preames force-pushed the riscv-gather-narrow-index-early branch from 0c3804b to acf5107 Compare September 14, 2023 18:06

preames commented Sep 14, 2023

View reviewed changes

topperc reviewed Sep 15, 2023

View reviewed changes

preames force-pushed the riscv-gather-narrow-index-early branch from 85b4bf4 to c394bfd Compare September 15, 2023 22:08

preames force-pushed the riscv-gather-narrow-index-early branch from c394bfd to bb20d10 Compare September 15, 2023 22:29

topperc approved these changes Sep 18, 2023

View reviewed changes

preames closed this Sep 18, 2023

preames deleted the riscv-gather-narrow-index-early branch September 18, 2023 18:49

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[RISCV] Narrow indices of fixed vector gather/scatter nodes #66405

[RISCV] Narrow indices of fixed vector gather/scatter nodes #66405

preames commented Sep 14, 2023 •

edited

Loading

llvmbot commented Sep 14, 2023 •

edited

Loading

preames Sep 14, 2023

topperc Sep 15, 2023

topperc Sep 15, 2023

topperc Sep 15, 2023

topperc commented Sep 15, 2023

preames commented Sep 15, 2023

preames commented Sep 18, 2023

topperc left a comment

preames commented Sep 18, 2023

[RISCV] Narrow indices of fixed vector gather/scatter nodes #66405

[RISCV] Narrow indices of fixed vector gather/scatter nodes #66405

Conversation

preames commented Sep 14, 2023 • edited Loading

llvmbot commented Sep 14, 2023 • edited Loading

preames Sep 14, 2023

Choose a reason for hiding this comment

topperc Sep 15, 2023

Choose a reason for hiding this comment

topperc Sep 15, 2023

Choose a reason for hiding this comment

topperc Sep 15, 2023

Choose a reason for hiding this comment

topperc commented Sep 15, 2023

preames commented Sep 15, 2023

preames commented Sep 18, 2023

topperc left a comment

Choose a reason for hiding this comment

preames commented Sep 18, 2023

preames commented Sep 14, 2023 •

edited

Loading

llvmbot commented Sep 14, 2023 •

edited

Loading