Skip to content

Commit

Permalink
[VPlan] Model FOR resume value extraction in VPlan. (llvm#93396)
Browse files Browse the repository at this point in the history
This patch uses the ExtractFromEnd VPInstruction opcode
to extract the value of a FOR to be used as resume value for the ph in
the scalar loop.

It adds a new live-out that temporarily wraps the FOR phi in the scalar
loop. fixFixedOrderRecurrence will process live outs for fixed order
recurrence phis by creating a new phi node in the scalar preheader, 
using the generated value for the live-out as incoming value from the
middle block and the original start value as incoming value for the
other edge. Creation of the phi in the preheader, as well as updating
the phi in the scalar loop will also be moved to VPlan in the future,
eventually retiring fixFixedOrderRecurrence

Depends on llvm#93395

PR: llvm#93396
  • Loading branch information
fhahn authored Jun 5, 2024
1 parent e635520 commit 05e1b53
Show file tree
Hide file tree
Showing 19 changed files with 206 additions and 152 deletions.
119 changes: 33 additions & 86 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "VPlan.h"
#include "VPlanAnalysis.h"
#include "VPlanHCFGBuilder.h"
#include "VPlanPatternMatch.h"
#include "VPlanTransforms.h"
#include "VPlanVerifier.h"
#include "llvm/ADT/APInt.h"
Expand Down Expand Up @@ -606,10 +607,9 @@ class InnerLoopVectorizer {
BasicBlock *MiddleBlock, BasicBlock *VectorHeader,
VPlan &Plan, VPTransformState &State);

/// Create the exit value of first order recurrences in the middle block and
/// update their users.
void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR,
VPTransformState &State);
/// Create the phi node for the resume value of first order recurrences in the
/// scalar preheader and update the users in the scalar loop.
void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State);

/// Iteratively sink the scalarized operands of a predicated instruction into
/// the block that was created for it.
Expand Down Expand Up @@ -3391,16 +3391,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
fixNonInductionPHIs(Plan, State);

// At this point every instruction in the original loop is widened to a
// vector form. Now we need to fix the recurrences in the loop. These PHI
// nodes are currently empty because we did not want to introduce cycles.
// This is the second stage of vectorizing recurrences. Note that fixing
// reduction phis are already modeled in VPlan.
// TODO: Also model fixing fixed-order recurrence phis in VPlan.
VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock();
for (VPRecipeBase &R : HeaderVPBB->phis()) {
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R))
fixFixedOrderRecurrence(FOR, State);
// vector form. Note that fixing reduction phis, as well as extracting the
// exit and resume values for fixed-order recurrences are already modeled in
// VPlan. All that remains to do here is to create a phi in the scalar
// pre-header for each fixed-order recurrence resume value.
// TODO: Also model creating phis in the scalar pre-header in VPlan.
for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) {
if (!Legal->isFixedOrderRecurrence(LO->getPhi()))
continue;
fixFixedOrderRecurrence(LO, State);
Plan.removeLiveOut(LO->getPhi());
}

// Forget the original basic block.
Expand All @@ -3416,6 +3416,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
for (PHINode &PN : Exit->phis())
PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN);

VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion();
VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock();
Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]);
if (Cost->requiresScalarEpilogue(VF.isVector())) {
Expand Down Expand Up @@ -3469,85 +3470,31 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State,
VF.getKnownMinValue() * UF);
}

void InnerLoopVectorizer::fixFixedOrderRecurrence(
VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) {
// This is the second phase of vectorizing first-order recurrences. An
// overview of the transformation is described below. Suppose we have the
// following loop.
//
// for (int i = 0; i < n; ++i)
// b[i] = a[i] - a[i - 1];
//
// There is a first-order recurrence on "a". For this loop, the shorthand
// scalar IR looks like:
//
// scalar.ph:
// s_init = a[-1]
// br scalar.body
//
// scalar.body:
// i = phi [0, scalar.ph], [i+1, scalar.body]
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
// s2 = a[i]
// b[i] = s2 - s1
// br cond, scalar.body, ...
//
// In this example, s1 is a recurrence because it's value depends on the
// previous iteration. In the first phase of vectorization, we created a
// vector phi v1 for s1. We now complete the vectorization and produce the
// shorthand vector IR shown below (for VF = 4, UF = 1).
//
// vector.ph:
// v_init = vector(..., ..., ..., a[-1])
// br vector.body
//
// vector.body
// i = phi [0, vector.ph], [i+4, vector.body]
// v1 = phi [v_init, vector.ph], [v2, vector.body]
// v2 = a[i, i+1, i+2, i+3];
// v3 = vector(v1(3), v2(0, 1, 2))
// b[i, i+1, i+2, i+3] = v2 - v3
// br cond, vector.body, middle.block
//
// middle.block:
// x = v2(3)
// br scalar.ph
//
// scalar.ph:
// s_init = phi [x, middle.block], [a[-1], otherwise]
// br scalar.body
//
// After execution completes the vector loop, we extract the next value of
// the recurrence (x) to use as the initial value in the scalar loop.

void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO,
VPTransformState &State) {
// Extract the last vector element in the middle block. This will be the
// initial value for the recurrence when jumping to the scalar loop.
VPValue *PreviousDef = PhiR->getBackedgeValue();
Value *Incoming = State.get(PreviousDef, UF - 1);
auto *ExtractForScalar = Incoming;
auto *IdxTy = Builder.getInt32Ty();
Value *RuntimeVF = nullptr;
if (VF.isVector()) {
auto *One = ConstantInt::get(IdxTy, 1);
Builder.SetInsertPoint(LoopMiddleBlock->getTerminator());
RuntimeVF = getRuntimeVF(Builder, IdxTy, VF);
auto *LastIdx = Builder.CreateSub(RuntimeVF, One);
ExtractForScalar =
Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract");
}
VPValue *VPExtract = LO->getOperand(0);
using namespace llvm::VPlanPatternMatch;
assert(match(VPExtract, m_VPInstruction<VPInstruction::ExtractFromEnd>(
m_VPValue(), m_VPValue())) &&
"FOR LiveOut expects to use an extract from end.");
Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true);

// Fix the initial value of the original recurrence in the scalar loop.
PHINode *ScalarHeaderPhi = LO->getPhi();
auto *InitScalarFOR =
ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader);
Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin());
PHINode *Phi = cast<PHINode>(PhiR->getUnderlyingValue());
auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init");
auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue();
auto *ScalarPreheaderPhi =
Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init");
for (auto *BB : predecessors(LoopScalarPreHeader)) {
auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit;
Start->addIncoming(Incoming, BB);
auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR;
ScalarPreheaderPhi->addIncoming(Incoming, BB);
}

Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start);
Phi->setName("scalar.recur");
ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader,
ScalarPreheaderPhi);
ScalarHeaderPhi->setName("scalar.recur");
}

void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
Expand Down
4 changes: 3 additions & 1 deletion llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -3166,7 +3166,9 @@ class VPlan {
/// definitions are VPValues that hold a pointer to their underlying IR.
SmallVector<VPValue *, 16> VPLiveInsToFree;

/// Values used outside the plan.
/// Values used outside the plan. It contains live-outs that need fixing. Any
/// live-out that is fixed outside VPlan needs to be removed. The remaining
/// live-outs are fixed via VPLiveOut::fixPhi.
MapVector<PHINode *, VPLiveOut *> LiveOuts;

/// Mapping from SCEVs to the VPValues representing their expansions.
Expand Down
81 changes: 79 additions & 2 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -847,14 +847,91 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan,
// all users.
RecurSplice->setOperand(0, FOR);

// This is the second phase of vectorizing first-order recurrences. An
// overview of the transformation is described below. Suppose we have the
// following loop with some use after the loop of the last a[i-1],
//
// for (int i = 0; i < n; ++i) {
// t = a[i - 1];
// b[i] = a[i] - t;
// }
// use t;
//
// There is a first-order recurrence on "a". For this loop, the shorthand
// scalar IR looks like:
//
// scalar.ph:
// s_init = a[-1]
// br scalar.body
//
// scalar.body:
// i = phi [0, scalar.ph], [i+1, scalar.body]
// s1 = phi [s_init, scalar.ph], [s2, scalar.body]
// s2 = a[i]
// b[i] = s2 - s1
// br cond, scalar.body, exit.block
//
// exit.block:
// use = lcssa.phi [s1, scalar.body]
//
// In this example, s1 is a recurrence because it's value depends on the
// previous iteration. In the first phase of vectorization, we created a
// vector phi v1 for s1. We now complete the vectorization and produce the
// shorthand vector IR shown below (for VF = 4, UF = 1).
//
// vector.ph:
// v_init = vector(..., ..., ..., a[-1])
// br vector.body
//
// vector.body
// i = phi [0, vector.ph], [i+4, vector.body]
// v1 = phi [v_init, vector.ph], [v2, vector.body]
// v2 = a[i, i+1, i+2, i+3];
// v3 = vector(v1(3), v2(0, 1, 2))
// b[i, i+1, i+2, i+3] = v2 - v3
// br cond, vector.body, middle.block
//
// middle.block:
// s_penultimate = v2(2) = v3(3)
// s_resume = v2(3)
// br cond, scalar.ph, exit.block
//
// scalar.ph:
// s_init' = phi [s_resume, middle.block], [s_init, otherwise]
// br scalar.body
//
// scalar.body:
// i = phi [0, scalar.ph], [i+1, scalar.body]
// s1 = phi [s_init', scalar.ph], [s2, scalar.body]
// s2 = a[i]
// b[i] = s2 - s1
// br cond, scalar.body, exit.block
//
// exit.block:
// lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block]
//
// After execution completes the vector loop, we extract the next value of
// the recurrence (x) to use as the initial value in the scalar loop. This
// is modeled by ExtractFromEnd.
Type *IntTy = Plan.getCanonicalIV()->getScalarType();
auto *Result = cast<VPInstruction>(MiddleBuilder.createNaryOp(

// Extract the penultimate value of the recurrence and update VPLiveOut
// users of the recurrence splice.
auto *Penultimate = cast<VPInstruction>(MiddleBuilder.createNaryOp(
VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(),
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))},
{}, "vector.recur.extract.for.phi"));
RecurSplice->replaceUsesWithIf(
Result, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });
Penultimate, [](VPUser &U, unsigned) { return isa<VPLiveOut>(&U); });

// Extract the resume value and create a new VPLiveOut for it.
auto *Resume = MiddleBuilder.createNaryOp(
VPInstruction::ExtractFromEnd,
{FOR->getBackedgeValue(),
Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))},
{}, "vector.recur.extract");
Plan.addLiveOut(cast<PHINode>(FOR->getUnderlyingInstr()), Resume);
}
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down Expand Up @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) {
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -786,8 +786,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down Expand Up @@ -871,8 +871,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT: middle.block:
; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or <vscale x 4 x i32> [[TMP58]], [[TMP57]]
; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32(<vscale x 4 x i32> [[BIN_RDX]])
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32()
; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4
; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1
Expand All @@ -109,6 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4
; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1
; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT13:%.*]] = extractelement <vscale x 4 x i32> [[TMP20]], i32 [[TMP66]]
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; DEFAULT: scalar.ph:
; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1509,11 +1509,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 {
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32()
; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2
; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 4 x i16> [[WIDE_MASKED_GATHER4]], i32 [[TMP32]]
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down Expand Up @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/LoopVectorize/X86/pr72969.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ define void @test(ptr %p) {
; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; VEC: middle.block:
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
; VEC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP28]], i32 3
; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; VEC: scalar.ph:
; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
Expand Down
Loading

0 comments on commit 05e1b53

Please sign in to comment.