Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV] Keep same SEW/LMUL ratio if possible in forward transfer #69788

Merged
merged 4 commits into from
Oct 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions llvm/lib/Target/RISCV/MCA/RISCVCustomBehaviour.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,9 +212,10 @@ getEEWAndEMULForUnitStrideLoadStore(unsigned Opcode, RISCVII::VLMUL LMUL,
llvm_unreachable("Opcode is not a vector unit stride load nor store");
}

uint8_t EMUL =
static_cast<uint8_t>(RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW));
return std::make_pair(EEW, EMUL);
auto EMUL = RISCVVType::getSameRatioLMUL(SEW, LMUL, EEW);
if (!EEW)
llvm_unreachable("Invalid SEW or LMUL for new ratio");
return std::make_pair(EEW, *EMUL);
}

unsigned RISCVInstrumentManager::getSchedClassID(
Expand Down
6 changes: 4 additions & 2 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,12 +206,14 @@ unsigned RISCVVType::getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul) {
return (SEW * 8) / LMul;
}

RISCVII::VLMUL RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
unsigned EEW) {
std::optional<RISCVII::VLMUL>
RISCVVType::getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW) {
unsigned Ratio = RISCVVType::getSEWLMULRatio(SEW, VLMUL);
unsigned EMULFixedPoint = (EEW * 8) / Ratio;
bool Fractional = EMULFixedPoint < 8;
unsigned EMUL = Fractional ? 8 / EMULFixedPoint : EMULFixedPoint / 8;
if (!isValidLMUL(EMUL, Fractional))
return std::nullopt;
return RISCVVType::encodeLMUL(EMUL, Fractional);
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -535,8 +535,8 @@ void printVType(unsigned VType, raw_ostream &OS);

unsigned getSEWLMULRatio(unsigned SEW, RISCVII::VLMUL VLMul);

RISCVII::VLMUL getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL,
unsigned EEW);
std::optional<RISCVII::VLMUL>
getSameRatioLMUL(unsigned SEW, RISCVII::VLMUL VLMUL, unsigned EEW);
} // namespace RISCVVType

namespace RISCVRVC {
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ class VSETVLIInfo {
MaskAgnostic = MA;
}

void setVLMul(RISCVII::VLMUL VLMul) { this->VLMul = VLMul; }

unsigned encodeVTYPE() const {
assert(isValid() && !isUnknown() && !SEWLMULRatioOnly &&
"Can't encode VTYPE for uninitialized or unknown");
Expand Down Expand Up @@ -1038,6 +1040,17 @@ void RISCVInsertVSETVLI::transferBefore(VSETVLIInfo &Info,
if (!RISCVII::hasVLOp(TSFlags))
return;

// If we don't use LMUL or the SEW/LMUL ratio, then adjust LMUL so that we
// maintain the SEW/LMUL ratio. This allows us to eliminate VL toggles in more
// places.
DemandedFields Demanded = getDemanded(MI, MRI, ST);
if (!Demanded.LMUL && !Demanded.SEWLMULRatio && Info.isValid() &&
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I mentioned this offline, but basing this off the existing demanded feilds is much better than re-infering which is what I'd tried This makes cornercase bugs around instructions (e.g. vrgather, or vslide*) much less likely because the existing getDemandedFeidls logic would have to be wrong as well. I feel a bit silly in retrospect, but glad you got it structured properly. :)

PrevInfo.isValid() && !Info.isUnknown() && !PrevInfo.isUnknown()) {
if (auto NewVLMul = RISCVVType::getSameRatioLMUL(
PrevInfo.getSEW(), PrevInfo.getVLMUL(), Info.getSEW()))
Info.setVLMul(*NewVLMul);
}

// For vmv.s.x and vfmv.s.f, there are only two behaviors, VL = 0 and
// VL > 0. We can discard the user requested AVL and just use the last
// one if we can prove it equally zero. This removes a vsetvli entirely
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/RISCV/double_reduct.ll
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vwaddu.vv v10, v8, v9
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v8, zero
; CHECK-NEXT: vredsum.vs v8, v10, v8
; CHECK-NEXT: vmv.x.s a0, v8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vadd.vi v12, v11, -16
; CHECK-NEXT: li a0, -256
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a0
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu
; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
; CHECK-NEXT: vmsne.vi v9, v9, 0
; CHECK-NEXT: vadd.vi v12, v11, 1
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -567,9 +567,9 @@ define void @buildvec_seq_v9i8(ptr %x) {
; CHECK-NEXT: vmv.v.i v8, 3
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: li a1, 146
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
; CHECK-NEXT: vse8.v v8, (a0)
Expand Down
108 changes: 53 additions & 55 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1101,21 +1101,20 @@ define void @urem_v2i64(ptr %x, ptr %y) {
define void @mulhu_v16i8(ptr %x) {
; CHECK-LABEL: mulhu_v16i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
preames marked this conversation as resolved.
Show resolved Hide resolved
; CHECK-NEXT: lui a1, 3
; CHECK-NEXT: addi a1, a1, -2044
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 0
; CHECK-NEXT: li a1, -128
; CHECK-NEXT: vmerge.vxm v10, v9, a1, v0
; CHECK-NEXT: lui a1, 1
; CHECK-NEXT: addi a2, a1, 32
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: lui a2, %hi(.LCPI65_0)
; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0)
; CHECK-NEXT: vle8.v v11, (a2)
Expand All @@ -1126,21 +1125,21 @@ define void @mulhu_v16i8(ptr %x) {
; CHECK-NEXT: vmulhu.vv v8, v8, v10
; CHECK-NEXT: vadd.vv v8, v8, v9
; CHECK-NEXT: li a2, 513
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a2
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmv.v.i v9, 4
; CHECK-NEXT: vmerge.vim v9, v9, 1, v0
; CHECK-NEXT: addi a1, a1, 78
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 3, v0
; CHECK-NEXT: lui a1, 8
; CHECK-NEXT: addi a1, a1, 304
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vim v9, v9, 2, v0
; CHECK-NEXT: vsrl.vv v8, v8, v9
; CHECK-NEXT: vse8.v v8, (a0)
Expand Down Expand Up @@ -1204,9 +1203,9 @@ define void @mulhu_v6i16(ptr %x) {
; CHECK-NEXT: vdivu.vv v9, v10, v9
; CHECK-NEXT: lui a1, 45217
; CHECK-NEXT: addi a1, a1, -1785
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdivu.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
Expand Down Expand Up @@ -1309,10 +1308,10 @@ define void @mulhs_v16i8(ptr %x) {
; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: lui a1, 5
; CHECK-NEXT: addi a1, a1, -1452
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; CHECK-NEXT: vmv.s.x v0, a1
; CHECK-NEXT: li a1, 57
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; CHECK-NEXT: vmerge.vxm v9, v9, a1, v0
; CHECK-NEXT: vmulhu.vv v8, v8, v9
; CHECK-NEXT: vmv.v.i v9, 7
Expand Down Expand Up @@ -1367,9 +1366,9 @@ define void @mulhs_v6i16(ptr %x) {
; CHECK-NEXT: vdiv.vv v9, v9, v10
; CHECK-NEXT: lui a1, 1020016
; CHECK-NEXT: addi a1, a1, 2041
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v10, a1
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; CHECK-NEXT: vsext.vf2 v11, v10
; CHECK-NEXT: vdiv.vv v8, v8, v11
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
Expand Down Expand Up @@ -4872,45 +4871,45 @@ define void @mulhu_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 0
; LMULMAX2-NEXT: lui a2, 163907
; LMULMAX2-NEXT: addi a2, a2, -2044
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: li a2, -128
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vxm v12, v10, a2, v0
; LMULMAX2-NEXT: lui a2, 66049
; LMULMAX2-NEXT: addi a2, a2, 32
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a2, %hi(.LCPI181_0)
; LMULMAX2-NEXT: addi a2, a2, %lo(.LCPI181_0)
; LMULMAX2-NEXT: vle8.v v14, (a2)
; LMULMAX2-NEXT: lui a1, 163907
; LMULMAX2-NEXT: addi a1, a1, -2044
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: li a1, -128
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vxm v12, v10, a1, v0
; LMULMAX2-NEXT: lui a1, 66049
; LMULMAX2-NEXT: addi a1, a1, 32
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, %hi(.LCPI181_0)
; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI181_0)
; LMULMAX2-NEXT: vle8.v v14, (a1)
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: vsrl.vv v10, v8, v10
; LMULMAX2-NEXT: vmulhu.vv v10, v10, v14
; LMULMAX2-NEXT: vsub.vv v8, v8, v10
; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12
; LMULMAX2-NEXT: vadd.vv v8, v8, v10
; LMULMAX2-NEXT: vmv.v.i v10, 4
; LMULMAX2-NEXT: lui a2, 8208
; LMULMAX2-NEXT: addi a2, a2, 513
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 8208
; LMULMAX2-NEXT: addi a1, a1, 513
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: lui a2, 66785
; LMULMAX2-NEXT: addi a2, a2, 78
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 66785
; LMULMAX2-NEXT: addi a1, a1, 78
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 3, v0
; LMULMAX2-NEXT: lui a2, 529160
; LMULMAX2-NEXT: addi a2, a2, 304
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 529160
; LMULMAX2-NEXT: addi a1, a1, 304
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 2, v0
; LMULMAX2-NEXT: vsrl.vv v8, v8, v10
; LMULMAX2-NEXT: vse8.v v8, (a0)
Expand Down Expand Up @@ -5250,11 +5249,11 @@ define void @mulhs_v32i8(ptr %x) {
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: vle8.v v8, (a0)
; LMULMAX2-NEXT: vmv.v.i v10, 7
; LMULMAX2-NEXT: lui a2, 304453
; LMULMAX2-NEXT: addi a2, a2, -1452
; LMULMAX2-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a2
; LMULMAX2-NEXT: vsetvli zero, a1, e8, m2, ta, ma
; LMULMAX2-NEXT: lui a1, 304453
; LMULMAX2-NEXT: addi a1, a1, -1452
; LMULMAX2-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; LMULMAX2-NEXT: vmv.s.x v0, a1
; LMULMAX2-NEXT: vsetvli zero, zero, e8, m2, ta, ma
; LMULMAX2-NEXT: vmerge.vim v10, v10, 1, v0
; LMULMAX2-NEXT: li a1, -123
; LMULMAX2-NEXT: vmv.v.x v12, a1
Expand All @@ -5267,15 +5266,14 @@ define void @mulhs_v32i8(ptr %x) {
;
; LMULMAX1-LABEL: mulhs_v32i8:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vsetivli zero, 16, e16, m2, ta, ma
; LMULMAX1-NEXT: vle8.v v8, (a0)
; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vle8.v v9, (a1)
; LMULMAX1-NEXT: lui a2, 5
; LMULMAX1-NEXT: addi a2, a2, -1452
; LMULMAX1-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; LMULMAX1-NEXT: vmv.s.x v0, a2
; LMULMAX1-NEXT: vsetivli zero, 16, e8, m1, ta, ma
; LMULMAX1-NEXT: vsetvli zero, zero, e8, m1, ta, ma
; LMULMAX1-NEXT: vmv.v.i v10, -9
; LMULMAX1-NEXT: vmerge.vim v10, v10, 9, v0
; LMULMAX1-NEXT: vdivu.vv v9, v9, v10
Expand Down
Loading