diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 188bfc164f30a2..c7c19ef456c7cb 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -59,6 +59,7 @@ #include "VPlan.h" #include "VPlanAnalysis.h" #include "VPlanHCFGBuilder.h" +#include "VPlanPatternMatch.h" #include "VPlanTransforms.h" #include "VPlanVerifier.h" #include "llvm/ADT/APInt.h" @@ -606,10 +607,9 @@ class InnerLoopVectorizer { BasicBlock *MiddleBlock, BasicBlock *VectorHeader, VPlan &Plan, VPTransformState &State); - /// Create the exit value of first order recurrences in the middle block and - /// update their users. - void fixFixedOrderRecurrence(VPFirstOrderRecurrencePHIRecipe *PhiR, - VPTransformState &State); + /// Create the phi node for the resume value of first order recurrences in the + /// scalar preheader and update the users in the scalar loop. + void fixFixedOrderRecurrence(VPLiveOut *LO, VPTransformState &State); /// Iteratively sink the scalarized operands of a predicated instruction into /// the block that was created for it. @@ -3391,16 +3391,16 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, fixNonInductionPHIs(Plan, State); // At this point every instruction in the original loop is widened to a - // vector form. Now we need to fix the recurrences in the loop. These PHI - // nodes are currently empty because we did not want to introduce cycles. - // This is the second stage of vectorizing recurrences. Note that fixing - // reduction phis are already modeled in VPlan. - // TODO: Also model fixing fixed-order recurrence phis in VPlan. - VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); - VPBasicBlock *HeaderVPBB = VectorRegion->getEntryBasicBlock(); - for (VPRecipeBase &R : HeaderVPBB->phis()) { - if (auto *FOR = dyn_cast(&R)) - fixFixedOrderRecurrence(FOR, State); + // vector form. Note that fixing reduction phis, as well as extracting the + // exit and resume values for fixed-order recurrences are already modeled in + // VPlan. All that remains to do here is to create a phi in the scalar + // pre-header for each fixed-order recurrence resume value. + // TODO: Also model creating phis in the scalar pre-header in VPlan. + for (const auto &[_, LO] : to_vector(Plan.getLiveOuts())) { + if (!Legal->isFixedOrderRecurrence(LO->getPhi())) + continue; + fixFixedOrderRecurrence(LO, State); + Plan.removeLiveOut(LO->getPhi()); } // Forget the original basic block. @@ -3416,6 +3416,7 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, for (PHINode &PN : Exit->phis()) PSE.getSE()->forgetLcssaPhiWithNewPredecessor(OrigLoop, &PN); + VPRegionBlock *VectorRegion = State.Plan->getVectorLoopRegion(); VPBasicBlock *LatchVPBB = VectorRegion->getExitingBasicBlock(); Loop *VectorLoop = LI->getLoopFor(State.CFG.VPBB2IRBB[LatchVPBB]); if (Cost->requiresScalarEpilogue(VF.isVector())) { @@ -3469,85 +3470,31 @@ void InnerLoopVectorizer::fixVectorizedLoop(VPTransformState &State, VF.getKnownMinValue() * UF); } -void InnerLoopVectorizer::fixFixedOrderRecurrence( - VPFirstOrderRecurrencePHIRecipe *PhiR, VPTransformState &State) { - // This is the second phase of vectorizing first-order recurrences. An - // overview of the transformation is described below. Suppose we have the - // following loop. - // - // for (int i = 0; i < n; ++i) - // b[i] = a[i] - a[i - 1]; - // - // There is a first-order recurrence on "a". For this loop, the shorthand - // scalar IR looks like: - // - // scalar.ph: - // s_init = a[-1] - // br scalar.body - // - // scalar.body: - // i = phi [0, scalar.ph], [i+1, scalar.body] - // s1 = phi [s_init, scalar.ph], [s2, scalar.body] - // s2 = a[i] - // b[i] = s2 - s1 - // br cond, scalar.body, ... - // - // In this example, s1 is a recurrence because it's value depends on the - // previous iteration. In the first phase of vectorization, we created a - // vector phi v1 for s1. We now complete the vectorization and produce the - // shorthand vector IR shown below (for VF = 4, UF = 1). - // - // vector.ph: - // v_init = vector(..., ..., ..., a[-1]) - // br vector.body - // - // vector.body - // i = phi [0, vector.ph], [i+4, vector.body] - // v1 = phi [v_init, vector.ph], [v2, vector.body] - // v2 = a[i, i+1, i+2, i+3]; - // v3 = vector(v1(3), v2(0, 1, 2)) - // b[i, i+1, i+2, i+3] = v2 - v3 - // br cond, vector.body, middle.block - // - // middle.block: - // x = v2(3) - // br scalar.ph - // - // scalar.ph: - // s_init = phi [x, middle.block], [a[-1], otherwise] - // br scalar.body - // - // After execution completes the vector loop, we extract the next value of - // the recurrence (x) to use as the initial value in the scalar loop. - +void InnerLoopVectorizer::fixFixedOrderRecurrence(VPLiveOut *LO, + VPTransformState &State) { // Extract the last vector element in the middle block. This will be the // initial value for the recurrence when jumping to the scalar loop. - VPValue *PreviousDef = PhiR->getBackedgeValue(); - Value *Incoming = State.get(PreviousDef, UF - 1); - auto *ExtractForScalar = Incoming; - auto *IdxTy = Builder.getInt32Ty(); - Value *RuntimeVF = nullptr; - if (VF.isVector()) { - auto *One = ConstantInt::get(IdxTy, 1); - Builder.SetInsertPoint(LoopMiddleBlock->getTerminator()); - RuntimeVF = getRuntimeVF(Builder, IdxTy, VF); - auto *LastIdx = Builder.CreateSub(RuntimeVF, One); - ExtractForScalar = - Builder.CreateExtractElement(Incoming, LastIdx, "vector.recur.extract"); - } + VPValue *VPExtract = LO->getOperand(0); + using namespace llvm::VPlanPatternMatch; + assert(match(VPExtract, m_VPInstruction( + m_VPValue(), m_VPValue())) && + "FOR LiveOut expects to use an extract from end."); + Value *ResumeScalarFOR = State.get(VPExtract, UF - 1, true); // Fix the initial value of the original recurrence in the scalar loop. + PHINode *ScalarHeaderPhi = LO->getPhi(); + auto *InitScalarFOR = + ScalarHeaderPhi->getIncomingValueForBlock(LoopScalarPreHeader); Builder.SetInsertPoint(LoopScalarPreHeader, LoopScalarPreHeader->begin()); - PHINode *Phi = cast(PhiR->getUnderlyingValue()); - auto *Start = Builder.CreatePHI(Phi->getType(), 2, "scalar.recur.init"); - auto *ScalarInit = PhiR->getStartValue()->getLiveInIRValue(); + auto *ScalarPreheaderPhi = + Builder.CreatePHI(ScalarHeaderPhi->getType(), 2, "scalar.recur.init"); for (auto *BB : predecessors(LoopScalarPreHeader)) { - auto *Incoming = BB == LoopMiddleBlock ? ExtractForScalar : ScalarInit; - Start->addIncoming(Incoming, BB); + auto *Incoming = BB == LoopMiddleBlock ? ResumeScalarFOR : InitScalarFOR; + ScalarPreheaderPhi->addIncoming(Incoming, BB); } - - Phi->setIncomingValueForBlock(LoopScalarPreHeader, Start); - Phi->setName("scalar.recur"); + ScalarHeaderPhi->setIncomingValueForBlock(LoopScalarPreHeader, + ScalarPreheaderPhi); + ScalarHeaderPhi->setName("scalar.recur"); } void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index bd500728883b3c..943edc3520869f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -3166,7 +3166,9 @@ class VPlan { /// definitions are VPValues that hold a pointer to their underlying IR. SmallVector VPLiveInsToFree; - /// Values used outside the plan. + /// Values used outside the plan. It contains live-outs that need fixing. Any + /// live-out that is fixed outside VPlan needs to be removed. The remaining + /// live-outs are fixed via VPLiveOut::fixPhi. MapVector LiveOuts; /// Mapping from SCEVs to the VPValues representing their expansions. diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index af3cf0ad7af04d..ab3b5cf2b9dabe 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -847,14 +847,91 @@ bool VPlanTransforms::adjustFixedOrderRecurrences(VPlan &Plan, // all users. RecurSplice->setOperand(0, FOR); + // This is the second phase of vectorizing first-order recurrences. An + // overview of the transformation is described below. Suppose we have the + // following loop with some use after the loop of the last a[i-1], + // + // for (int i = 0; i < n; ++i) { + // t = a[i - 1]; + // b[i] = a[i] - t; + // } + // use t; + // + // There is a first-order recurrence on "a". For this loop, the shorthand + // scalar IR looks like: + // + // scalar.ph: + // s_init = a[-1] + // br scalar.body + // + // scalar.body: + // i = phi [0, scalar.ph], [i+1, scalar.body] + // s1 = phi [s_init, scalar.ph], [s2, scalar.body] + // s2 = a[i] + // b[i] = s2 - s1 + // br cond, scalar.body, exit.block + // + // exit.block: + // use = lcssa.phi [s1, scalar.body] + // + // In this example, s1 is a recurrence because it's value depends on the + // previous iteration. In the first phase of vectorization, we created a + // vector phi v1 for s1. We now complete the vectorization and produce the + // shorthand vector IR shown below (for VF = 4, UF = 1). + // + // vector.ph: + // v_init = vector(..., ..., ..., a[-1]) + // br vector.body + // + // vector.body + // i = phi [0, vector.ph], [i+4, vector.body] + // v1 = phi [v_init, vector.ph], [v2, vector.body] + // v2 = a[i, i+1, i+2, i+3]; + // v3 = vector(v1(3), v2(0, 1, 2)) + // b[i, i+1, i+2, i+3] = v2 - v3 + // br cond, vector.body, middle.block + // + // middle.block: + // s_penultimate = v2(2) = v3(3) + // s_resume = v2(3) + // br cond, scalar.ph, exit.block + // + // scalar.ph: + // s_init' = phi [s_resume, middle.block], [s_init, otherwise] + // br scalar.body + // + // scalar.body: + // i = phi [0, scalar.ph], [i+1, scalar.body] + // s1 = phi [s_init', scalar.ph], [s2, scalar.body] + // s2 = a[i] + // b[i] = s2 - s1 + // br cond, scalar.body, exit.block + // + // exit.block: + // lo = lcssa.phi [s1, scalar.body], [s.penultimate, middle.block] + // + // After execution completes the vector loop, we extract the next value of + // the recurrence (x) to use as the initial value in the scalar loop. This + // is modeled by ExtractFromEnd. Type *IntTy = Plan.getCanonicalIV()->getScalarType(); - auto *Result = cast(MiddleBuilder.createNaryOp( + + // Extract the penultimate value of the recurrence and update VPLiveOut + // users of the recurrence splice. + auto *Penultimate = cast(MiddleBuilder.createNaryOp( VPInstruction::ExtractFromEnd, {FOR->getBackedgeValue(), Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 2))}, {}, "vector.recur.extract.for.phi")); RecurSplice->replaceUsesWithIf( - Result, [](VPUser &U, unsigned) { return isa(&U); }); + Penultimate, [](VPUser &U, unsigned) { return isa(&U); }); + + // Extract the resume value and create a new VPLiveOut for it. + auto *Resume = MiddleBuilder.createNaryOp( + VPInstruction::ExtractFromEnd, + {FOR->getBackedgeValue(), + Plan.getOrAddLiveIn(ConstantInt::get(IntTy, 1))}, + {}, "vector.recur.extract"); + Plan.addLiveOut(cast(FOR->getUnderlyingInstr()), Resume); } return true; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll index 33d7a3a3c8ac0d..50ab61cac5b1f3 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fixed-order-recurrence.ll @@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll index 162fb9c802dd2c..32dc2ec1d50a8b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/induction-costs.ll @@ -127,8 +127,8 @@ define i64 @pointer_induction_only(ptr %start, ptr %end) { ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i64> [[TMP9]], i32 0 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i64> [[TMP9]], i32 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll index 718148a67fcc87..b7463032cada8e 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll @@ -786,8 +786,8 @@ define void @add_phifail(ptr noalias nocapture readonly %p, ptr noalias nocaptur ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP4]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -871,8 +871,8 @@ define i8 @add_phifail2(ptr noalias nocapture readonly %p, ptr noalias nocapture ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <16 x i32> [[TMP6]], i32 14 -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i32> [[TMP6]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index c24c1a38177d5c..1353290fa2e28a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -100,7 +100,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT: middle.block: ; DEFAULT-NEXT: [[BIN_RDX:%.*]] = or [[TMP58]], [[TMP57]] ; DEFAULT-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.or.nxv4i32( [[BIN_RDX]]) -; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: [[TMP61:%.*]] = call i32 @llvm.vscale.i32() ; DEFAULT-NEXT: [[TMP62:%.*]] = mul i32 [[TMP61]], 4 ; DEFAULT-NEXT: [[TMP63:%.*]] = sub i32 [[TMP62]], 1 @@ -109,6 +108,7 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; DEFAULT-NEXT: [[TMP65:%.*]] = mul i32 [[TMP64]], 4 ; DEFAULT-NEXT: [[TMP66:%.*]] = sub i32 [[TMP65]], 1 ; DEFAULT-NEXT: [[VECTOR_RECUR_EXTRACT13:%.*]] = extractelement [[TMP20]], i32 [[TMP66]] +; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; DEFAULT: scalar.ph: ; DEFAULT-NEXT: [[SCALAR_RECUR_INIT14:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT13]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll index 1853e551806bcd..3a25ffe26cc035 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll @@ -1509,11 +1509,11 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) #1 { ; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP39:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[TMP30:%.*]] = call i32 @llvm.vscale.i32() ; CHECK-NEXT: [[TMP31:%.*]] = shl nuw nsw i32 [[TMP30]], 2 ; CHECK-NEXT: [[TMP32:%.*]] = add nsw i32 [[TMP31]], -1 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement [[WIDE_MASKED_GATHER4]], i32 [[TMP32]] +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll index 8004563f381656..94575004e364b4 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll @@ -47,8 +47,8 @@ define void @firstorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD1]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -154,10 +154,10 @@ define void @thirdorderrec(ptr nocapture noundef readonly %x, ptr noalias nocapt ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <16 x i8> [[WIDE_LOAD5]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT6:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <16 x i8> [[TMP10]], i32 15 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT10:%.*]] = phi i8 [ [[DOTPRE]], [[FOR_BODY_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT9]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll index 45af544f0304d4..829a91f18e5e20 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr72969.ll @@ -83,8 +83,8 @@ define void @test(ptr %p) { ; VEC-NEXT: [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; VEC: middle.block: -; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP28]], i32 3 +; VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] ; VEC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; VEC: scalar.ph: ; VEC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 1, [[VECTOR_SCEVCHECK]] ], [ 1, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index c04178a1c13e21..5907c58365fa0d 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -33,7 +33,12 @@ define void @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]> ; CHECK-NEXT: } ; entry: @@ -89,7 +94,14 @@ define void @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_2:%.+]]> = extract-from-end vp<[[FOR1_SPLICE]]>, ir<1> +; CHECK-NEXT: EMIT vp<[[RESUME_3:%.+]]> = extract-from-end vp<[[FOR2_SPLICE]]>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> +; CHECK-NEXT: Live-out i16 %for.2 = vp<[[RESUME_2]]> +; CHECK-NEXT: Live-out i16 %for.3 = vp<[[RESUME_3]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll index a1173c6b46a2cb..447f0b0bfee20f 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains.ll @@ -19,8 +19,8 @@ define i16 @test_chained_first_order_recurrences_1(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 ; entry: @@ -62,8 +62,8 @@ define i16 @test_chained_first_order_recurrences_2(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP8]], label %middle.block, label %vector.body, !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: @@ -108,10 +108,10 @@ define i16 @test_chained_first_order_recurrences_3(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: @@ -220,10 +220,10 @@ define i16 @test_chained_first_order_recurrences_3_reordered_1(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: @@ -271,10 +271,10 @@ define i16 @test_chained_first_order_recurrences_3_reordered_2(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 ; entry: @@ -322,10 +322,10 @@ define i16 @test_chained_first_order_recurrences_3_for2_no_other_uses(ptr %ptr) ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: @@ -372,10 +372,10 @@ define i16 @test_chained_first_order_recurrences_3_for1_for2_no_other_uses(ptr % ; CHECK-NEXT: br i1 [[TMP10]], label %middle.block, label %vector.body, !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i16> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i16> [[TMP4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI8:%.*]] = extractelement <4 x i16> [[TMP5]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT7:%.*]] = extractelement <4 x i16> [[TMP5]], i32 3 ; entry: @@ -421,8 +421,8 @@ define double @test_chained_first_order_recurrence_sink_users_1(ptr %ptr) { ; CHECK-NEXT: br i1 [[TMP9]], label %middle.block, label %vector.body, !llvm.loop [[LOOP10:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[WIDE_LOAD]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; entry: @@ -661,10 +661,10 @@ define double @test_resinking_required(ptr %p, ptr noalias %a, ptr noalias %b) { ; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2 -; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI6:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT5:%.*]] = extractelement <4 x double> [[BROADCAST_SPLAT4]], i32 3 +; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI10:%.*]] = extractelement <4 x double> [[TMP4]], i32 2 ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT9:%.*]] = extractelement <4 x double> [[TMP4]], i32 3 ; CHECK-NEXT: br i1 true, label %End, label %scalar.ph ; diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll index 149e4705885b12..6b1d38142b9ec1 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-complex.ll @@ -854,8 +854,8 @@ define void @sink_dominance(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -937,8 +937,8 @@ define void @sink_dominance_2(ptr %ptr, i32 %N) { ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i64> [[TMP5]], i32 3 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[UMAX1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll index 116d0f65c235fa..30a98db665cb3c 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-sink-replicate-region.ll @@ -73,7 +73,10 @@ define void @sink_replicate_region_1(i32 %x, ptr %ptr, ptr noalias %dst) optsize ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: @@ -145,7 +148,10 @@ define void @sink_replicate_region_2(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: @@ -216,9 +222,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%and.red>, vp<[[SEL]]> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i32 %res = vp<[[RED_RES]]> +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: @@ -314,7 +322,10 @@ define void @sink_replicate_region_4_requires_split_at_end_of_block(i32 %x, ptr ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%conv>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %0 = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: @@ -397,7 +408,10 @@ define void @sink_replicate_region_after_replicate_region(ptr %ptr, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%recur.next>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: @@ -469,7 +483,10 @@ define void @need_new_block_after_sinking_pr56146(i32 %x, ptr %src, ptr noalias ; CHECK-NEXT: Successor(s): middle.block ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%l>, ir<1> ; CHECK-NEXT: No successors +; CHECK-EMPTY: +; CHECK-NEXT: Live-out i32 %.pn = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll index a588d71a2c76ec..6d3654d3b97e4b 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll @@ -55,8 +55,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -165,8 +165,8 @@ define void @recurrence_1(ptr readonly noalias %a, ptr noalias %b, i32 %n) { ; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -267,8 +267,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[RDX_MINMAX:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP17]], <4 x i32> [[TMP18]]) ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX]]) -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -400,8 +400,8 @@ define i32 @recurrence_2(ptr nocapture readonly %a, i32 %n) { ; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP9]]) -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -530,8 +530,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; UNROLL-NO-IC-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -674,8 +674,8 @@ define void @recurrence_3(ptr readonly noalias %a, ptr noalias %b, i32 %n, float ; SINK-AFTER-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -919,8 +919,8 @@ define i32 @PR27246() { ; UNROLL-NO-IC-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 2 -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1013,8 +1013,8 @@ define i32 @PR27246() { ; SINK-AFTER-NEXT: br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 2 -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[VEC_IND]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[I_016]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP3]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[E_015]], [[FOR_COND1_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1115,11 +1115,11 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP35:%.*]] = load i32, ptr [[TMP23]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP24]], align 4 ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = load i32, ptr [[TMP25]], align 4 -; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = load i32, ptr [[TMP26]], align 4 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load i32, ptr [[TMP26]], align 4 ; UNROLL-NO-IC-NEXT: [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP35]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP41]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 @@ -1127,7 +1127,6 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; UNROLL-NO-IC-NEXT: br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1223,18 +1222,17 @@ define i32 @PR30183(i32 %pre_load, ptr %a, ptr %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP15:%.*]] = load i32, ptr [[TMP11]], align 4 ; SINK-AFTER-NEXT: [[TMP16:%.*]] = load i32, ptr [[TMP12]], align 4 ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = load i32, ptr [[TMP13]], align 4 -; SINK-AFTER-NEXT: [[TMP18:%.*]] = load i32, ptr [[TMP14]], align 4 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load i32, ptr [[TMP14]], align 4 ; SINK-AFTER-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0 ; SINK-AFTER-NEXT: [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1 ; SINK-AFTER-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[VECTOR_RECUR_EXTRACT_FOR_PHI]], i32 2 -; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3 +; SINK-AFTER-NEXT: [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[VECTOR_RECUR_EXTRACT]], i32 3 ; SINK-AFTER-NEXT: [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1574,11 +1572,11 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC-NEXT: [[TMP24:%.*]] = load double, ptr [[TMP12]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP25:%.*]] = load double, ptr [[TMP13]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP26:%.*]] = load double, ptr [[TMP14]], align 8 -; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = load double, ptr [[TMP15]], align 8 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load double, ptr [[TMP15]], align 8 ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = insertelement <4 x double> poison, double [[TMP24]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP31]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP31]] = insertelement <4 x double> [[TMP30]], double [[VECTOR_RECUR_EXTRACT]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP23]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = shufflevector <4 x double> [[TMP23]], <4 x double> [[TMP31]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = fmul <4 x double> [[TMP32]], [[TMP23]] @@ -1595,7 +1593,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP41]], [[TMP40]] ; UNROLL-NO-IC-NEXT: [[TMP43:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]]) -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP31]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1710,11 +1707,11 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: [[TMP8:%.*]] = load double, ptr [[TMP4]], align 8 ; SINK-AFTER-NEXT: [[TMP9:%.*]] = load double, ptr [[TMP5]], align 8 ; SINK-AFTER-NEXT: [[TMP10:%.*]] = load double, ptr [[TMP6]], align 8 -; SINK-AFTER-NEXT: [[TMP11:%.*]] = load double, ptr [[TMP7]], align 8 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load double, ptr [[TMP7]], align 8 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = insertelement <4 x double> poison, double [[TMP8]], i32 0 ; SINK-AFTER-NEXT: [[TMP13:%.*]] = insertelement <4 x double> [[TMP12]], double [[TMP9]], i32 1 ; SINK-AFTER-NEXT: [[TMP14:%.*]] = insertelement <4 x double> [[TMP13]], double [[TMP10]], i32 2 -; SINK-AFTER-NEXT: [[TMP15]] = insertelement <4 x double> [[TMP14]], double [[TMP11]], i32 3 +; SINK-AFTER-NEXT: [[TMP15]] = insertelement <4 x double> [[TMP14]], double [[VECTOR_RECUR_EXTRACT]], i32 3 ; SINK-AFTER-NEXT: [[TMP16:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP15]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP17:%.*]] = fmul <4 x double> [[TMP16]], [[TMP15]] ; SINK-AFTER-NEXT: [[TMP18:%.*]] = fcmp une <4 x double> [[TMP17]], zeroinitializer @@ -1725,7 +1722,6 @@ define i32 @PR33613(ptr %b, double %j, i32 %d) { ; SINK-AFTER-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[TMP22:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP20]]) -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP15]], i32 3 ; SINK-AFTER-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1826,8 +1822,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -1934,8 +1930,8 @@ define void @sink_after(ptr noalias %a, ptr noalias %b, i64 %n) { ; SINK-AFTER-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2042,11 +2038,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: [[TMP28:%.*]] = load i16, ptr [[TMP14]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = load i16, ptr [[TMP15]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP30:%.*]] = load i16, ptr [[TMP16]], align 2 -; UNROLL-NO-IC-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP17]], align 2 +; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load i16, ptr [[TMP17]], align 2 ; UNROLL-NO-IC-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i32 0 ; UNROLL-NO-IC-NEXT: [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i32 1 ; UNROLL-NO-IC-NEXT: [[TMP34:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP30]], i32 2 -; UNROLL-NO-IC-NEXT: [[TMP35]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 3 +; UNROLL-NO-IC-NEXT: [[TMP35]] = insertelement <4 x i16> [[TMP34]], i16 [[VECTOR_RECUR_EXTRACT]], i32 3 ; UNROLL-NO-IC-NEXT: [[TMP36:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP27]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP37:%.*]] = shufflevector <4 x i16> [[TMP27]], <4 x i16> [[TMP35]], <4 x i32> ; UNROLL-NO-IC-NEXT: [[TMP38:%.*]] = sext <4 x i16> [[TMP36]] to <4 x i32> @@ -2066,7 +2062,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; UNROLL-NO-IC-NEXT: br i1 [[TMP48]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; UNROLL-NO-IC: middle.block: ; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP35]], i32 3 ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2177,11 +2172,11 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: [[TMP10:%.*]] = load i16, ptr [[TMP5]], align 2 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = load i16, ptr [[TMP6]], align 2 ; SINK-AFTER-NEXT: [[TMP12:%.*]] = load i16, ptr [[TMP7]], align 2 -; SINK-AFTER-NEXT: [[TMP13:%.*]] = load i16, ptr [[TMP8]], align 2 +; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = load i16, ptr [[TMP8]], align 2 ; SINK-AFTER-NEXT: [[TMP14:%.*]] = insertelement <4 x i16> poison, i16 [[TMP10]], i32 0 ; SINK-AFTER-NEXT: [[TMP15:%.*]] = insertelement <4 x i16> [[TMP14]], i16 [[TMP11]], i32 1 ; SINK-AFTER-NEXT: [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 2 -; SINK-AFTER-NEXT: [[TMP17]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 3 +; SINK-AFTER-NEXT: [[TMP17]] = insertelement <4 x i16> [[TMP16]], i16 [[VECTOR_RECUR_EXTRACT]], i32 3 ; SINK-AFTER-NEXT: [[TMP18:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP17]], <4 x i32> ; SINK-AFTER-NEXT: [[TMP19:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32> ; SINK-AFTER-NEXT: [[TMP20:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32> @@ -2194,7 +2189,6 @@ define void @PR34711(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %n) { ; SINK-AFTER-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] ; SINK-AFTER: middle.block: ; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] -; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP17]], i32 3 ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2294,8 +2288,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; UNROLL-NO-IC-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -2407,8 +2401,8 @@ define void @sink_after_with_multiple_users(ptr noalias %a, ptr noalias %b, i64 ; SINK-AFTER-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] ; SINK-AFTER: middle.block: -; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3 +; SINK-AFTER-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; SINK-AFTER-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] ; SINK-AFTER: scalar.ph: ; SINK-AFTER-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index 50a5cc6774c5c6..df61798853ef60 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -6256,8 +6256,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6327,8 +6327,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; IND-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; IND-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; IND: middle.block: -; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; IND-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1 +; IND-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; IND-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; IND: scalar.ph: ; IND-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6402,8 +6402,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; UNROLL: middle.block: -; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1 +; UNROLL-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; UNROLL-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL: scalar.ph: ; UNROLL-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6485,8 +6485,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; UNROLL-NO-IC-NEXT: [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; UNROLL-NO-IC: middle.block: -; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1 +; UNROLL-NO-IC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; UNROLL-NO-IC-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; UNROLL-NO-IC: scalar.ph: ; UNROLL-NO-IC-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] @@ -6560,8 +6560,8 @@ define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n ; INTERLEAVE-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]] ; INTERLEAVE: middle.block: -; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; INTERLEAVE-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3 +; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] ; INTERLEAVE-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; INTERLEAVE: scalar.ph: ; INTERLEAVE-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll index 078d6ca35ba113..bcc8632da4c641 100644 --- a/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll +++ b/llvm/test/Transforms/LoopVectorize/interleave-and-scalarize-only.ll @@ -202,7 +202,10 @@ exit: ; DBG-NEXT: Successor(s): middle.block ; DBG-EMPTY: ; DBG-NEXT: middle.block: +; DBG-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end vp<[[SCALAR_STEPS]]>, ir<1> ; DBG-NEXT: No successors +; DBG-EMPTY: +; DBG-NEXT: Live-out i32 %for = vp<[[RESUME_1]]> ; DBG-NEXT: } define void @first_order_recurrence_using_induction(i32 %n, ptr %dst) { diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll index 4c3377255b21a7..0f3e7e6ac40177 100644 --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll @@ -1511,8 +1511,8 @@ define void @PR34743(ptr %a, ptr %b, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i16> [[WIDE_VEC]], i64 7 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[END:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ] diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll index 4a71e18ea37785..4c93cb870fadb8 100644 --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -912,9 +912,11 @@ define i16 @print_first_order_recurrence_and_result(ptr %ptr) { ; CHECK-EMPTY: ; CHECK-NEXT: middle.block: ; CHECK-NEXT: EMIT vp<[[FOR_RESULT:%.+]]> = extract-from-end ir<%for.1.next>, ir<2> +; CHECK-NEXT: EMIT vp<[[RESUME_1:%.+]]> = extract-from-end ir<%for.1.next>, ir<1> ; CHECK-NEXT: No successors ; CHECK-EMPTY: ; CHECK-NEXT: Live-out i16 %for.1.lcssa = vp<[[FOR_RESULT]]> +; CHECK-NEXT: Live-out i16 %for.1 = vp<[[RESUME_1]]> ; CHECK-NEXT: } ; entry: