Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VPlan] Add new VPInstruction ocpode for header mask. #89603

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 9 additions & 12 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8040,21 +8040,14 @@ void VPRecipeBuilder::createHeaderMask() {
return;
}

// Introduce the early-exit compare IV <= BTC to form header block mask.
// This is used instead of IV < TC because TC may wrap, unlike BTC. Start by
// constructing the desired canonical IV in the header block as its first
// non-phi instructions.

// Introduce an abstract header-mask VPInstruction. This will be lowered later
// depending on target preference.
VPBasicBlock *HeaderVPBB = Plan.getVectorLoopRegion()->getEntryBasicBlock();
auto NewInsertionPoint = HeaderVPBB->getFirstNonPhi();
auto *IV = new VPWidenCanonicalIVRecipe(Plan.getCanonicalIV());
HeaderVPBB->insert(IV, NewInsertionPoint);

VPBuilder::InsertPointGuard Guard(Builder);
Builder.setInsertPoint(HeaderVPBB, NewInsertionPoint);
VPValue *BlockMask = nullptr;
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
BlockMask = Builder.createICmp(CmpInst::ICMP_ULE, IV, BTC);
VPValue *BlockMask =
Builder.createNaryOp(VPInstruction::HeaderMask, {Plan.getCanonicalIV()});
BlockMaskCache[Header] = BlockMask;
}

Expand Down Expand Up @@ -8558,9 +8551,13 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF,
VPlanTransforms::truncateToMinimalBitwidths(
*Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext());
VPlanTransforms::optimize(*Plan, *PSE.getSE());
// TODO: try to put it close to addActiveLaneMask().
// TODO: The three passes that lower the header mask (addActiveLaneMask,
// addExplicitVectorLength, lowerRecipes) should arguably be applied
// together, depending on tail folding style, inside
// VPlanTransforms::optimize().
if (CM.foldTailWithEVL())
VPlanTransforms::addExplicitVectorLength(*Plan);
VPlanTransforms::lowerRecipes(*Plan);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(TODO) The three passes that lower the header mask (addActiveLaneMask, addExplicitVectorLength, lowerRecipes) should arguably be applied together, depending on tail folding style, inside VPlanTransforms::optimize().

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated, the TODO above, thanks!

assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid");
VPlans.push_back(std::move(Plan));
}
Expand Down
22 changes: 12 additions & 10 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -1176,6 +1176,12 @@ class VPInstruction : public VPRecipeWithIRFlags {
BranchOnCount,
BranchOnCond,
ComputeReductionResult,
// An abstract representation of the vector loops header mask, to be lowered
// later depending on target preference. Relevant only when the header may
// have a partial mask, i.e., when tail folding. A mask known to always be
// full is represented by null, w/o a HeaderMask recipe. A header mask may
// not be empty.
HeaderMask,
// Add an offset in bytes (second operand) to a base pointer (first
// operand). Only generates scalar values (either for the first lane only or
// for all lanes, depending on its uses).
Expand Down Expand Up @@ -2688,14 +2694,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
/// A Recipe for widening the canonical induction variable of the vector loop.
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
public:
VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
VPWidenCanonicalIVRecipe(VPValue *Start)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's still a [Scalar]CanonicalIV, right? Start may be confused with the invariant value feeding an IV from the preheader.

: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {Start}) {}

~VPWidenCanonicalIVRecipe() override = default;

VPWidenCanonicalIVRecipe *clone() override {
return new VPWidenCanonicalIVRecipe(
cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
return new VPWidenCanonicalIVRecipe(getOperand(0));
}

VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
Expand All @@ -2710,12 +2715,6 @@ class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe {
void print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const override;
#endif

/// Returns the scalar type of the induction.
const Type *getScalarType() const {
return cast<VPCanonicalIVPHIRecipe>(getOperand(0)->getDefiningRecipe())
->getScalarType();
}
};

/// A recipe for converting the input value \p IV value to the corresponding
Expand Down Expand Up @@ -3055,6 +3054,9 @@ class VPRegionBlock : public VPBlockBase {
/// Clone all blocks in the single-entry single-exit region of the block and
/// their recipes without updating the operands of the cloned recipes.
VPRegionBlock *clone() override;

/// Return the header mask recipe of the VPlan, if there is one.
VPInstruction *getHeaderMask(VPlan &Plan) const;
};

/// VPlan models a candidate for vectorization, encoding various decisions take
Expand Down
17 changes: 10 additions & 7 deletions llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPInstruction::Not:
case VPInstruction::CalculateTripCountMinusVF:
case VPInstruction::CanonicalIVIncrementForPart:
case VPInstruction::HeaderMask:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: would be good to list in lex order, so better placed before PtrAdd.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks!

case VPInstruction::PtrAdd:
return false;
default:
Expand Down Expand Up @@ -690,6 +691,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ComputeReductionResult:
O << "compute-reduction-result";
break;
case VPInstruction::HeaderMask:
O << "header-mask";
break;
case VPInstruction::PtrAdd:
O << "ptradd";
break;
Expand Down Expand Up @@ -1897,22 +1901,21 @@ void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent,
#endif

void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
Value *CanonicalIV = State.get(getOperand(0), 0, /*IsScalar*/ true);
Type *STy = CanonicalIV->getType();
Value *Start = State.get(getOperand(0), 0, /*IsScalar*/ true);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Scalar]CanonicalIV? Admittedly used to build VStart below.

Type *STy = Start->getType();
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
ElementCount VF = State.VF;
Value *VStart = VF.isScalar()
? CanonicalIV
: Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast");
Value *VStart =
VF.isScalar() ? Start : Builder.CreateVectorSplat(VF, Start, "broadcast");
for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
Value *VStep = createStepForVF(Builder, STy, VF, Part);
if (VF.isVector()) {
VStep = Builder.CreateVectorSplat(VF, VStep);
VStep =
Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType()));
}
Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
State.set(this, CanonicalVectorIV, Part);
Value *Res = Builder.CreateAdd(VStart, VStep, "vec.iv");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Retain name - this still generates a canonical wide IV?

State.set(this, Res, Part);
}
}

Expand Down
Loading
Loading