Skip to content

Commit

Permalink
[VPlan] Check if only first part is used for all per-part VPInsts.
Browse files Browse the repository at this point in the history
Apply the onlyFirstPartUsed logic generally to all per-part
VPInstructions. Note that the test changes remove the second part
of an unsued first-order recurrence splice.
  • Loading branch information
fhahn committed Jun 8, 2024
1 parent 44aecca commit a43d999
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 13 deletions.
11 changes: 8 additions & 3 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -324,9 +324,6 @@ Value *VPInstruction::generatePerPart(VPTransformState &State, unsigned Part) {

if (Instruction::isBinaryOp(getOpcode())) {
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
if (Part != 0 && vputils::onlyFirstPartUsed(this))
return State.get(this, 0, OnlyFirstLaneUsed);

Value *A = State.get(getOperand(0), Part, OnlyFirstLaneUsed);
Value *B = State.get(getOperand(1), Part, OnlyFirstLaneUsed);
auto *Res =
Expand Down Expand Up @@ -628,6 +625,7 @@ void VPInstruction::execute(VPTransformState &State) {
canGenerateScalarForFirstLane() &&
(vputils::onlyFirstLaneUsed(this) || isVectorToScalar());
bool GeneratesPerAllLanes = doesGeneratePerAllLanes();
bool OnlyFirstPartUsed = vputils::onlyFirstPartUsed(this);
for (unsigned Part = 0; Part < State.UF; ++Part) {
if (GeneratesPerAllLanes) {
for (unsigned Lane = 0, NumLanes = State.VF.getKnownMinValue();
Expand All @@ -639,6 +637,13 @@ void VPInstruction::execute(VPTransformState &State) {
continue;
}

if (Part != 0 && OnlyFirstPartUsed && hasResult()) {
Value *Part0 = State.get(this, 0, /*IsScalar*/ GeneratesPerFirstLaneOnly);
State.set(this, Part0, Part,
/*IsScalar*/ GeneratesPerFirstLaneOnly);
continue;
}

Value *GeneratedValue = generatePerPart(State, Part);
if (!hasResult())
continue;
Expand Down
2 changes: 0 additions & 2 deletions llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
; CHECK-NEXT: [[TMP8:%.*]] = zext <2 x i32> [[WIDE_LOAD]] to <2 x i64>
; CHECK-NEXT: [[TMP9]] = zext <2 x i32> [[WIDE_LOAD4]] to <2 x i64>
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i64> [[VECTOR_RECUR]], <2 x i64> [[TMP8]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i64> [[TMP8]], <2 x i64> [[TMP9]], <2 x i32> <i32 1, i32 2>
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
Expand Down Expand Up @@ -186,7 +185,6 @@ define i64 @int_and_pointer_iv(ptr %start, i32 %N) {
; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP5]] = zext <4 x i32> [[WIDE_LOAD3]] to <4 x i64>
; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,6 @@ define i64 @test_pr62954_scalar_epilogue_required(ptr %A, ptr noalias %B, ptr %C
; CHECK-NEXT: [[TMP0:%.*]] = sub nsw <16 x i64> zeroinitializer, [[VEC_IND]]
; CHECK-NEXT: [[TMP1]] = sub nsw <16 x i64> zeroinitializer, [[STEP_ADD]]
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i64> [[VECTOR_RECUR]], <16 x i64> [[TMP0]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <16 x i64> [[TMP0]], <16 x i64> [[TMP1]], <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i64> [[TMP1]], i32 15
; CHECK-NEXT: store i64 [[TMP4]], ptr [[GEP]], align 8
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
Expand Down
6 changes: 0 additions & 6 deletions llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -912,7 +912,6 @@ define i32 @PR27246() {
; UNROLL-NO-IC-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 -4, i32 -4, i32 -4, i32 -4>
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
Expand Down Expand Up @@ -1121,7 +1120,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) {
; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2
; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3
; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
Expand Down Expand Up @@ -1393,7 +1391,6 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x) {
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP1]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
Expand Down Expand Up @@ -2572,7 +2569,6 @@ define void @sink_dead_inst(ptr %a) {
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = zext <4 x i16> [[TMP2]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP5]] = zext <4 x i16> [[TMP3]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = add <4 x i16> [[TMP2]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP9]] = add <4 x i16> [[TMP3]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
Expand Down Expand Up @@ -3491,7 +3487,6 @@ define i32 @sink_after_dead_inst(ptr %A.ptr) {
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32>
; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[TMP0]]
; UNROLL-NO-IC-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
; UNROLL-NO-IC-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[TMP10]], i32 0
Expand Down Expand Up @@ -3665,7 +3660,6 @@ define void @unused_recurrence(ptr %a) {
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP3]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[TMP5:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
; UNROLL-NO-IC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,6 @@ define i64 @constant_folded_previous_value() {
; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VECTOR_RECUR:.*]] = phi <vscale x 4 x i64> [ %vector.recur.init, %vector.ph ], [ shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), %vector.body ]
; CHECK-VF4UF2: %[[SPLICE1:.*]] = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> %vector.recur, <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
; CHECK-VF4UF2: %[[SPLICE2:.*]] = call <vscale x 4 x i64> @llvm.vector.splice.nxv4i64(<vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i64> shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer), i32 -1)
; CHECK-VF4UF2: br i1 {{.*}}, label %middle.block, label %vector.body
entry:
br label %scalar.body
Expand Down

0 comments on commit a43d999

Please sign in to comment.