Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to loop hoisting #71504

Merged
merged 2 commits into from
Jul 1, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 52 additions & 65 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3346,7 +3346,15 @@ BasicBlock* Compiler::optLoopEntry(BasicBlock* preHeader)
{
assert((preHeader->bbFlags & BBF_LOOP_PREHEADER) != 0);

return (preHeader->bbJumpDest == nullptr) ? preHeader->bbNext : preHeader->bbJumpDest;
if (preHeader->KindIs(BBJ_NONE))
{
return preHeader->bbNext;
}
else
{
assert(preHeader->KindIs(BBJ_ALWAYS));
return preHeader->bbJumpDest;
}
}

//-----------------------------------------------------------------------------
Expand Down Expand Up @@ -6461,43 +6469,34 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt)
m_loopsConsidered++;
#endif // LOOP_HOIST_STATS

BasicBlockList* preHeadersOfChildLoops = nullptr;
BasicBlockList* firstPreHeader = nullptr;
BasicBlockList* firstPreHeader = nullptr;
BasicBlockList** preHeaderAppendPtr = &firstPreHeader;

if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP)
{
BitVecTraits m_visitedTraits(fgBBNumMax * 2, this);
BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits));

for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP;
child = optLoopTable[child].lpSibling)
{
optHoistLoopNest(child, hoistCtxt);

if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD)
{
// If any preheaders were found, add them to the tracking list
// If a pre-header is found, add it to the tracking list. Most likely, the recursive call
// to hoist from the `child` loop nest found something to hoist and created a pre-header
// where the hoisted code was placed.

BasicBlock* preHeaderBlock = optLoopTable[child].lpHead;
if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum))
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum);
JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum);
JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum);

// Here, we are arranging the blocks in reverse execution order, so when they are pushed
// on the stack that hoist these blocks further sees them in execution order.
if (firstPreHeader == nullptr)
{
preHeadersOfChildLoops = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr);
firstPreHeader = preHeadersOfChildLoops;
}
else
{
preHeadersOfChildLoops->next =
new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr);
preHeadersOfChildLoops = preHeadersOfChildLoops->next;
}
}
// Here, we are arranging the blocks in reverse lexical order, so when they are removed from the
// head of this list and pushed on the stack of hoisting blocks to process, they are in
// forward lexical order when popped. Note that for child loops `i` and `j`, in order `i`
// followed by `j` in the `lpSibling` list, that `j` is actually first in lexical order
// and that makes these blocks arranged in reverse lexical order in this list.
// That is, the sibling list is in reverse lexical order.
// Note: the sibling list order should not matter to any algorithm; this order is not guaranteed.
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
*preHeaderAppendPtr = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr);
preHeaderAppendPtr = &((*preHeaderAppendPtr)->next);
}
}
}
Expand Down Expand Up @@ -6617,11 +6616,10 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi
// or side-effect dependent things.
//
// We really should consider hoisting from conditionally executed blocks, if they are frequently executed
// and it is safe to evaluate the tree early
// and it is safe to evaluate the tree early.
//
ArrayStack<BasicBlock*> defExec(getAllocatorLoopHoist());

bool pushAllPreheaders = false;
BasicBlockList* preHeadersList = existingPreHeaders;

if (pLoopDsc->lpExitCnt == 1)
{
Expand All @@ -6637,49 +6635,39 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi
//
// TODO-CQ: In future, we should create preheaders upfront before building
// dominators so we don't have to do this extra work here.
BasicBlock* cur = pLoopDsc->lpExit;
BasicBlockList* preHeadersList = existingPreHeaders;

BasicBlock* cur = pLoopDsc->lpExit;
while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry)
{
JITDUMP(" -- " FMT_BB " (dominate exit block)\n", cur->bbNum);
defExec.Push(cur);
cur = cur->bbIDom;

if (preHeadersList != nullptr)
for (; preHeadersList != nullptr; preHeadersList = preHeadersList->next)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you find out an example where we didn't add a preheader in this loop?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you find out an example where we didn't add a preheader in this loop?

Do you mean where we added more than one preheader? Zero or one would happen the same as before. (And the answer is yes)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok and the example is #71504 (comment)?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Correct.

{
BasicBlock* preHeaderBlock = preHeadersList->block;
BasicBlock* lpEntry = optLoopEntry(preHeaderBlock);
if (cur->bbNum < lpEntry->bbNum)
{
JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum,
lpEntry->bbNatLoopNum);

defExec.Push(preHeaderBlock);
preHeadersList = preHeadersList->next;
}
else
{
break;
}
}
}

// Push the remaining preheaders, if any. This usually will happen if entry
// and exit blocks of lnum is same.
while (preHeadersList != nullptr)
{
BasicBlock* preHeaderBlock = preHeadersList->block;
JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum,
optLoopEntry(preHeaderBlock)->bbNatLoopNum);
defExec.Push(preHeaderBlock);
preHeadersList = preHeadersList->next;
}

// If we didn't reach the entry block, give up and *just* push the entry block.
// If we didn't reach the entry block, give up and *just* push the entry block (and the pre-headers).
if (cur != pLoopDsc->lpEntry)
{
JITDUMP(" -- odd, we didn't reach entry from exit via dominators. Only considering hoisting in entry "
"block " FMT_BB "\n",
pLoopDsc->lpEntry->bbNum);
defExec.Reset();
pushAllPreheaders = true;
preHeadersList = existingPreHeaders;
}
}
else // More than one exit
Expand All @@ -6689,22 +6677,15 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, Basi

JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n",
pLoopDsc->lpEntry->bbNum, lnum);
pushAllPreheaders = true;
}

if (pushAllPreheaders)
// Push the remaining preheaders, if any.
for (; preHeadersList != nullptr; preHeadersList = preHeadersList->next)
{
// We will still push all the preheaders found.
BasicBlockList* preHeadersList = existingPreHeaders;

while (preHeadersList != nullptr)
{
BasicBlock* preHeaderBlock = preHeadersList->block;
JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum,
optLoopEntry(preHeaderBlock)->bbNatLoopNum);
defExec.Push(preHeaderBlock);
preHeadersList = preHeadersList->next;
}
BasicBlock* preHeaderBlock = preHeadersList->block;
JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum,
optLoopEntry(preHeaderBlock)->bbNatLoopNum);
defExec.Push(preHeaderBlock);
}

JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum);
Expand Down Expand Up @@ -7118,36 +7099,38 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
Value& top = m_valueStack.TopRef();
assert(top.Node() == stmt->GetRootNode());

// hoist the top node?
if (top.m_hoistable)
{
m_compiler->optHoistCandidate(stmt->GetRootNode(), block, m_loopNum, m_hoistContext);
}
else
{
JITDUMP(" [%06u] not %s: %s\n", dspTreeID(top.Node()),
top.m_invariant ? "invariant" : "hoistable", top.m_failReason);
JITDUMP(" [%06u] %s: %s\n", dspTreeID(top.Node()),
top.m_invariant ? "not hoistable" : "not invariant", top.m_failReason);
}

m_valueStack.Reset();
}

// Only unconditionally executed blocks in the loop are visited (see optHoistThisLoop)
// so after we're done visiting the first block we need to assume the worst, that the
// blocks that are not visisted have side effects.
// blocks that are not visited have side effects.
m_beforeSideEffect = false;
}

fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* node = *use;
JITDUMP("----- PreOrderVisit for [%06u] %s\n", dspTreeID(node), GenTree::OpName(node->OperGet()));
m_valueStack.Emplace(node);
return fgWalkResult::WALK_CONTINUE;
}

fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
{
GenTree* tree = *use;
JITDUMP("----- PostOrderVisit for [%06u]\n", dspTreeID(tree));
JITDUMP("----- PostOrderVisit for [%06u] %s\n", dspTreeID(tree), GenTree::OpName(tree->OperGet()));

if (tree->OperIsLocal())
{
Expand Down Expand Up @@ -7197,6 +7180,10 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
}
#endif

JITDUMP(" [%06u] %s: %s: %s\n", dspTreeID(tree), GenTree::OpName(tree->OperGet()),
top.m_invariant ? (top.m_hoistable ? "hoistable" : "not hoistable") : "not invariant",
top.m_failReason);

return fgWalkResult::WALK_CONTINUE;
}

Expand Down Expand Up @@ -7489,8 +7476,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack<BasicBlock*>* blo
// should not hoist further children past it.
hasExcep = (tree->gtFlags & GTF_EXCEPT) != 0;
}
JITDUMP(" [%06u] not %s: %s\n", dspTreeID(value.Node()),
value.m_invariant ? "invariant" : "hoistable", value.m_failReason);
JITDUMP(" [%06u] %s: %s\n", dspTreeID(value.Node()),
value.m_invariant ? "not hoistable" : "not invariant", value.m_failReason);
}
else
{
Expand Down