Skip to content

Commit

Permalink
[VPlan] Add new VPInstruction ocpode for header mask.
Browse files Browse the repository at this point in the history
This patch adds a new VPInstruction::HeaderMask opcode to model the
abstract header-mask used for tail-folding. It will be lowered depending
on target preference (either using active-lane-mask,
explicit-vector-length or a wide compare of the canonical IV and the
backedge taken count)

Similarly to llvm#82270, it would
be good to clarify/agree on the terminology w.r.t. to recipes/opcodes
that cannot be code-gen'd directly (i.e. require further gradual lowering).

NOTE: some tests are failing or needed updating, due to widened IVs
being replaced by scalar-steps, as their only use was the earlier wide
compare. This could be fixed by either adding a suitable wide canonical
IV as operand to the header-mask recipe and exactly preserve the
original behavior. Alternatively we could keep the current behavior of
the patch and update the tests. Or introduce a wide induction PHI
instead of VPWidenCanonicalIVReicpe; currently we *only* use a wide IV
for VPWidenCanonicalIVRecipe, if there was a suitable IV in the original
loop, *even* if the mask compare is the *only* wide use. Either never or
always using a wide PHI would be more consistent (or eventually make a
more informed cost-based decision).
  • Loading branch information
fhahn committed Apr 22, 2024
1 parent c93f029 commit 2630805
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 300 deletions.
16 changes: 5 additions & 11 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8035,21 +8035,14 @@ void VPRecipeBuilder::createHeaderMask() {
return;
}

// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.

// Introduce an abstract header-mask VPInstruction. This will be lowered later
// depending on target preference.
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
HeaderVPBB->insert(IV, NewInsertionPoint);

VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
VPValue *BlockMask = nullptr;
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
VPValue *BlockMask =
Builder.createNaryOp(VPInstruction::HeaderMask, {Plan.getCanonicalIV()});
BlockMaskCache[Header] = BlockMask;
}

Expand Down Expand Up @@ -8555,6 +8548,7 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
// TODO: try to put it close to addActiveLaneMask().
if (CM.foldTailWithEVL())
VPlanTransforms::addExplicitVectorLength(*Plan);
VPlanTransforms::lowerRecipes(*Plan);
assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1180,6 +1180,9 @@ class VPInstruction : public VPRecipeWithIRFlags {
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
PtrAdd,
// An abstract representation of the vector loops header mask, to be lowered
// later depending on target preference.
HeaderMask,
};

private:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::PtrAdd:
case VPInstruction::HeaderMask:
return false;
default:
return true;
Expand Down
194 changes: 72 additions & 122 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -434,44 +434,6 @@ static void removeRedundantInductionCasts(VPlan &Plan) {
}
}

/// Try to replace VPWidenCanonicalIVRecipes with a widened canonical IV
/// recipe, if it exists.
static void removeRedundantCanonicalIVs(VPlan &Plan) {
VPCanonicalIVPHIRecipe *CanonicalIV = Plan.getCanonicalIV();
VPWidenCanonicalIVRecipe *WidenNewIV = nullptr;
for (VPUser *U : CanonicalIV->users()) {
WidenNewIV = dyn_cast<VPWidenCanonicalIVRecipe>(U);
if (WidenNewIV)
break;
}

if (!WidenNewIV)
return;

VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);

if (!WidenOriginalIV || !WidenOriginalIV->isCanonical() ||
WidenOriginalIV->getScalarType() != WidenNewIV->getScalarType())
continue;

// Replace WidenNewIV with WidenOriginalIV if WidenOriginalIV provides
// everything WidenNewIV's users need. That is, WidenOriginalIV will
// generate a vector phi or all users of WidenNewIV demand the first lane
// only.
if (any_of(WidenOriginalIV->users(),
[WidenOriginalIV](VPUser *U) {
return !U->usesScalars(WidenOriginalIV);
}) ||
vputils::onlyFirstLaneUsed(WidenNewIV)) {
WidenNewIV->replaceAllUsesWith(WidenOriginalIV);
WidenNewIV->eraseFromParent();
return;
}
}
}

/// Returns true if \p R is dead and can be removed.
static bool isDeadRecipe(VPRecipeBase &R) {
using namespace llvm::PatternMatch;
Expand Down Expand Up @@ -1086,7 +1048,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
}

void VPlanTransforms::optimize(VPlan &Plan, ScalarEvolution &SE) {
removeRedundantCanonicalIVs(Plan);
removeRedundantInductionCasts(Plan);

simplifyRecipes(Plan, SE.getContext());
Expand Down Expand Up @@ -1203,52 +1164,32 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
return LaneMaskPhi;
}

/// Collect all VPValues representing a header mask through the (ICMP_ULE,
/// WideCanonicalIV, backedge-taken-count) pattern.
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
SmallVector<VPValue *> WideCanonicalIVs;
auto *FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
assert(count_if(Plan.getCanonicalIV()->users(),
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); }) <=
1 &&
"Must have at most one VPWideCanonicalIVRecipe");
if (FoundWidenCanonicalIVUser != Plan.getCanonicalIV()->users().end()) {
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
WideCanonicalIVs.push_back(WideCanonicalIV);
}

// Also include VPWidenIntOrFpInductionRecipes that represent a widened
// version of the canonical induction.
/// Return the header mask recipe of the VPlan, if there is one.
static VPInstruction *getHeaderMask(VPlan &Plan) {
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &Phi : HeaderVPBB->phis()) {
auto *WidenOriginalIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi);
if (WidenOriginalIV && WidenOriginalIV->isCanonical())
WideCanonicalIVs.push_back(WidenOriginalIV);
}
auto R = find_if(*HeaderVPBB, [](VPRecipeBase &R) {
using namespace llvm::VPlanPatternMatch;
return match(&R, m_VPInstruction<VPInstruction::HeaderMask>(m_VPValue()));
});
return R == HeaderVPBB->end() ? nullptr : cast<VPInstruction>(&*R);
}

// Walk users of wide canonical IVs and collect to all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
SmallVector<VPValue *> HeaderMasks;
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
for (auto *Wide : WideCanonicalIVs) {
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
auto *HeaderMask = dyn_cast<VPInstruction>(U);
if (!HeaderMask || HeaderMask->getOpcode() != Instruction::ICmp ||
HeaderMask->getPredicate() != CmpInst::ICMP_ULE ||
HeaderMask->getOperand(1) != BTC)
continue;
static VPValue *getOrCreateWideCanonicalIV(VPlan &Plan,
VPRecipeBase *InsertPt) {

assert(HeaderMask->getOperand(0) == Wide &&
"WidenCanonicalIV must be the first operand of the compare");
HeaderMasks.push_back(HeaderMask);
}
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
for (VPRecipeBase &R : HeaderVPBB->phis()) {
auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R);
if (!WideIV || !WideIV->isCanonical() ||
Plan.getCanonicalIV()->getScalarType() != WideIV->getScalarType())
continue;
return WideIV;
break;
}
return HeaderMasks;

auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
IV->insertBefore(InsertPt);
return IV;
}

void VPlanTransforms::addActiveLaneMask(
Expand All @@ -1258,30 +1199,23 @@ void VPlanTransforms::addActiveLaneMask(
UseActiveLaneMaskForControlFlow) &&
"DataAndControlFlowWithoutRuntimeCheck implies "
"UseActiveLaneMaskForControlFlow");

auto FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
[](VPUser *U) { return isa<VPWidenCanonicalIVRecipe>(U); });
assert(FoundWidenCanonicalIVUser &&
"Must have widened canonical IV when tail folding!");
auto *WideCanonicalIV =
cast<VPWidenCanonicalIVRecipe>(*FoundWidenCanonicalIVUser);
VPValue *HeaderMask = getHeaderMask(Plan);
assert(HeaderMask && "Active-lane-mask not needed?");
VPSingleDefRecipe *LaneMask;
if (UseActiveLaneMaskForControlFlow) {
LaneMask = addVPLaneMaskPhiAndUpdateExitBranch(
Plan, DataAndControlFlowWithoutRuntimeCheck);
} else {
VPBuilder B = VPBuilder::getToInsertAfter(WideCanonicalIV);
LaneMask = B.createNaryOp(VPInstruction::ActiveLaneMask,
{WideCanonicalIV, Plan.getTripCount()}, nullptr,
"active.lane.mask");
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
VPBuilder B;
B.setInsertPoint(HeaderVPBB, HeaderVPBB->getFirstNonPhi());
LaneMask = B.createNaryOp(
VPInstruction::ActiveLaneMask,
{getOrCreateWideCanonicalIV(Plan, &*HeaderVPBB->getFirstNonPhi()),
Plan.getTripCount()},
nullptr, "active.lane.mask");
}

// Walk users of WideCanonicalIV and replace all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an
// active-lane-mask.
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan))
HeaderMask->replaceAllUsesWith(LaneMask);
HeaderMask->replaceAllUsesWith(LaneMask);
}

/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
Expand All @@ -1307,6 +1241,10 @@ void VPlanTransforms::addActiveLaneMask(
/// ...
///
void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
VPValue *HeaderMask = getHeaderMask(Plan);
if (!HeaderMask)
return;

VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto *CanonicalIVPHI = Plan.getCanonicalIV();
VPValue *StartV = CanonicalIVPHI->getStartValue();
Expand Down Expand Up @@ -1336,31 +1274,30 @@ void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) {
NextEVLIV->insertBefore(CanonicalIVIncrement);
EVLPhi->addOperand(NextEVLIV);

for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
if (!MemR)
continue;
assert(!MemR->isReverse() &&
"Reversed memory operations not supported yet.");
VPValue *OrigMask = MemR->getMask();
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
N->insertBefore(L);
L->replaceAllUsesWith(N);
L->eraseFromParent();
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
N->insertBefore(S);
S->eraseFromParent();
} else {
llvm_unreachable("unsupported recipe");
}
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
if (!MemR)
continue;
assert(!MemR->isReverse() &&
"Reversed memory operations not supported yet.");
VPValue *OrigMask = MemR->getMask();
assert(OrigMask && "Unmasked widen memory recipe when folding tail");
VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
N->insertBefore(L);
L->replaceAllUsesWith(N);
L->eraseFromParent();
} else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
N->insertBefore(S);
S->eraseFromParent();
} else {
llvm_unreachable("unsupported recipe");
}
recursivelyDeleteDeadRecipes(HeaderMask);
}
recursivelyDeleteDeadRecipes(HeaderMask);

// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
Expand Down Expand Up @@ -1465,3 +1402,16 @@ void VPlanTransforms::dropPoisonGeneratingRecipes(
}
}
}

void VPlanTransforms::lowerRecipes(VPlan &Plan) {
VPInstruction *HeaderMask = getHeaderMask(Plan);
if (!HeaderMask)
return;

VPValue *IV = getOrCreateWideCanonicalIV(Plan, HeaderMask);
VPBuilder Builder(HeaderMask);
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
VPValue *M = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
HeaderMask->replaceAllUsesWith(M);
HeaderMask->eraseFromParent();
}
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlanTransforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ struct VPlanTransforms {
/// VPCanonicalIVPHIRecipe is only used to control the loop after
/// this transformation.
static void addExplicitVectorLength(VPlan &Plan);

/// Lower abstract VPInstruction recipes to a concrete sequence of recipes for
/// which code can be generated.
static void lowerRecipes(VPlan &Plan);
};

} // namespace llvm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ define dso_local void @alignTC(ptr noalias nocapture %A, i32 %n) optsize {
; CHECK-NEXT: store i32 13, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[RIVPLUS1]] = add nuw nsw i32 [[RIV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[RIVPLUS1]], [[ALIGNEDTC]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP2:![0-9]+]]
; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
Expand Down Expand Up @@ -158,13 +158,15 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT2]]
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
Expand All @@ -174,31 +176,30 @@ define dso_local void @cannotProveAlignedTC(ptr noalias nocapture %A, i32 %p, i3
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 1
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
; CHECK: pred.store.if1:
; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[INDEX]], 1
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP5]]
; CHECK-NEXT: store i32 13, ptr [[TMP6]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
; CHECK: pred.store.continue2:
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP0]], i32 2
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
; CHECK: pred.store.if3:
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 2
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP8]]
; CHECK-NEXT: store i32 13, ptr [[TMP9]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
; CHECK: pred.store.continue4:
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.if5:
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.if7:
; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[INDEX]], 3
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[TMP11]]
; CHECK-NEXT: store i32 13, ptr [[TMP12]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
Expand Down
Loading

0 comments on commit 2630805

Please sign in to comment.