-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV] Fix v[f]slide1down.vx having VL changed #106110
[RISCV] Fix v[f]slide1down.vx having VL changed #106110
Conversation
v[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result. This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL. Fixes llvm#106109
@llvm/pr-subscribers-backend-risc-v Author: Luke Lau (lukel97) Changesv[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result. This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL. Fixes #106109 Full diff: https://github.com/llvm/llvm-project/pull/106110.diff 3 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index a84e92b0fda262..878859fc1d0864 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1665,6 +1665,7 @@ defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, /*slidesUp=*/true>;
defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>;
+let ActiveElementsAffectResult = 1 in
defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
} // Predicates = [HasVInstructions]
@@ -1672,6 +1673,7 @@ let Predicates = [HasVInstructionsAnyF] in {
let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
} // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
+let ActiveElementsAffectResult = 1 in
defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
} // Predicates = [HasVInstructionsAnyF]
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr106109.ll b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
new file mode 100644
index 00000000000000..683753e19f2d5a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
@@ -0,0 +1,16 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %false, float %1, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(<vscale x 4 x float> undef, <vscale x 4 x float> %0, float %1, i64 4)
+ %b = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> %false, <vscale x 4 x float> %a, <vscale x 4 x i1> %mask, i64 1)
+ ret <vscale x 4 x float> %b
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 6700920cebff0a..2ff775d6d14f43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -776,12 +776,15 @@ define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vsc
ret <vscale x 2 x i32> %b
}
+; FIXME: We can still fold this given that the vmerge and the vslide1down have
+; the same vl.
declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
; CHECK-LABEL: vpselect_vslide1down:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT: vslide1down.vx v8, v9, a0, v0.t
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v9, v9, a0
+; CHECK-NEXT: vmerge.vvm v8, v8, v9, v0
; CHECK-NEXT: ret
%1 = zext i32 %vl to i64
%a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
In llvm#106110 we had to mark v[f]slide1down.vx as ActiveElementsAffectResult since the elements in the body depend on VL. However it doesn't depend on the mask, so this was overly conservative and broke the vmerge peephole. We can recover this by splitting up ActiveElementsAffectResult into VL and Mask bits, so we can more accurately model v[f]slide1down.vx and re-enable the peephole. I found it quite difficult to find a new name for the flags, suggestions are welcome.
v[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result. This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL. Fixes llvm#106109
…106517) In #106110 we had to mark v[f]slide1down.vx as ActiveElementsAffectResult since the elements in the body depend on VL. However it doesn't depend on the mask, so this was overly conservative and broke the vmerge peephole. We can recover this by splitting up ActiveElementsAffectResult into VL and Mask bits, so we can more accurately model v[f]slide1down.vx and re-enable the peephole.
v[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result.
This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL.
Fixes #106109