Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Add loop-aware RPO, and use as LSRA's block sequence #108086

Merged
merged 10 commits into from
Oct 10, 2024
6 changes: 5 additions & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4524,7 +4524,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
compFunctionTraceStart();

// Enable flow graph checks
activePhaseChecks |= PhaseChecks::CHECK_FG;
activePhaseChecks |= PhaseChecks::CHECK_FG | PhaseChecks::CHECK_FG_ANNOTATIONS;

// Prepare for importation
//
Expand Down Expand Up @@ -5239,6 +5239,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
lvaTrackedFixed = true;
const unsigned numBlocksBeforeLSRA = fgBBcount;

// Backend phases will use a loop-aware RPO traversal of the flowgraph,
// so skip checking pre/postorder numbers for correctness.
activePhaseChecks &= ~PhaseChecks::CHECK_FG_ANNOTATIONS;

// Now that lowering is completed we can proceed to perform register allocation
//
auto linearScanPhase = [this] {
Expand Down
20 changes: 11 additions & 9 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -1527,15 +1527,16 @@ extern const char* PhaseEnums[];
// clang-format off
enum class PhaseChecks : unsigned int
{
CHECK_NONE = 0,
CHECK_IR = 1 << 0, // ir flags, etc
CHECK_UNIQUE = 1 << 1, // tree node uniqueness
CHECK_FG = 1 << 2, // flow graph integrity
CHECK_EH = 1 << 3, // eh table integrity
CHECK_LOOPS = 1 << 4, // loop integrity/canonicalization
CHECK_LIKELIHOODS = 1 << 5, // profile data likelihood integrity
CHECK_PROFILE = 1 << 6, // profile data full integrity
CHECK_LINKED_LOCALS = 1 << 7, // check linked list of locals
CHECK_NONE = 0,
CHECK_IR = 1 << 0, // ir flags, etc
CHECK_UNIQUE = 1 << 1, // tree node uniqueness
CHECK_FG = 1 << 2, // flow graph integrity
CHECK_EH = 1 << 3, // eh table integrity
CHECK_LOOPS = 1 << 4, // loop integrity/canonicalization
CHECK_LIKELIHOODS = 1 << 5, // profile data likelihood integrity
CHECK_PROFILE = 1 << 6, // profile data full integrity
CHECK_LINKED_LOCALS = 1 << 7, // check linked list of locals
CHECK_FG_ANNOTATIONS = 1 << 8, // check flowgraph annotation data structures
};

inline constexpr PhaseChecks operator ~(PhaseChecks a)
Expand Down Expand Up @@ -6286,6 +6287,7 @@ class Compiler

template <const bool useProfile = false>
FlowGraphDfsTree* fgComputeDfs();
FlowGraphDfsTree* fgComputeLoopAwareDfs();
void fgInvalidateDfsTree();

void fgRemoveReturnBlock(BasicBlock* block);
Expand Down
74 changes: 74 additions & 0 deletions src/coreclr/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4120,6 +4120,80 @@ FlowGraphDfsTree* Compiler::fgComputeDfs()
template FlowGraphDfsTree* Compiler::fgComputeDfs<false>();
template FlowGraphDfsTree* Compiler::fgComputeDfs<true>();

//------------------------------------------------------------------------
// fgComputeLoopAwareDfs: Compute a depth-first search tree for the flow graph
// where in the RPO traversal, loop bodies are visited before loop successors.
//
// Returns:
// The tree.
//
// Notes:
// If the flow graph has loops, the DFS will be reordered such that loop bodies are compact.
// This will invalidate BasicBlock::bbPreorderNum and BasicBlock::bbPostorderNum.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we have any dependencies on bbPreorderNum or bbPostorderNum in the backend, but if we want to use loop-aware RPOs elsewhere in the JIT, I can work on making these members consistent.

//
FlowGraphDfsTree* Compiler::fgComputeLoopAwareDfs()
Copy link
Member

@jakobbotsch jakobbotsch Oct 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything gained from trying to represent this as an actual FlowGraphDfsTree? I think it would make more sense to have a utility function that given FlowGraphDfsTree and FlowGraphNaturalLoops visits the blocks in RPO that respects the loop structure. It would basically be a slight generalization of what we have in VN already.

The "compute DFS tree" into "identify loops" into "now create another DFS tree" seems wasteful and conceptually a bit odd.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there anything gained from trying to represent this as an actual FlowGraphDfsTree?

Probably not.

I think it would make more sense to have a utility function that given FlowGraphDfsTree and FlowGraphNaturalLoops visits the blocks in RPO that respects the loop structure.

That sounds sensible -- I'll try modeling this after FlowGraphNaturalLoop::VisitLoopBlocksReversePostOrder.

{
if (m_dfsTree == nullptr)
{
// Computing a profile-aware RPO means the DFS computation won't match the debug check's expectations,
// so make sure these checks have already been disabled.
assert(!hasFlag(activePhaseChecks, PhaseChecks::CHECK_FG_ANNOTATIONS));
m_dfsTree = fgComputeDfs</* useProfile */ true>();
}

if (!m_dfsTree->HasCycle())
{
// No need to search for loops
return m_dfsTree;
}

if (m_loops == nullptr)
{
m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
}

m_blockToLoop = BlockToNaturalLoopMap::Build(m_loops);

EnsureBasicBlockEpoch();
BlockSet visitedBlocks(BlockSetOps::MakeEmpty(this));

BasicBlock** loopAwarePostOrder = new (this, CMK_DepthFirstSearch) BasicBlock*[fgBBcount];
const unsigned numBlocks = m_dfsTree->GetPostOrderCount();
unsigned newIndex = numBlocks - 1;

auto visitBlock = [this, loopAwarePostOrder, &visitedBlocks, &newIndex](BasicBlock* block) -> BasicBlockVisit {
// If this block is in a loop, we will try to visit it more than once
// (first when we visit its containing loop, and then later as we iterate
// through the initial RPO).
// Thus, we need to keep track of visited blocks.
if (!BlockSetOps::IsMember(this, visitedBlocks, block->bbNum))
{
loopAwarePostOrder[newIndex--] = block;
BlockSetOps::AddElemD(this, visitedBlocks, block->bbNum);
}

return BasicBlockVisit::Continue;
};

for (unsigned i = numBlocks; i != 0; i--)
{
BasicBlock* const block = m_dfsTree->GetPostOrder(i - 1);
FlowGraphNaturalLoop* const loop = m_blockToLoop->GetLoop(block);

// If this block is a loop header, visit the entire loop before moving on
if ((loop != nullptr) && (block == loop->GetHeader()))
{
loop->VisitLoopBlocksReversePostOrder(visitBlock);
}
else
{
visitBlock(block);
}
}

return new (this, CMK_DepthFirstSearch) FlowGraphDfsTree(this, loopAwarePostOrder, numBlocks, /* hasCycle */ true);
}

//------------------------------------------------------------------------
// fgInvalidateDfsTree: Invalidate computed DFS tree and dependent annotations
// (like loops, dominators and SSA).
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,10 +949,11 @@ void LinearScan::setBlockSequence()
#endif // DEBUG

// Initialize the "visited" blocks set.
bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
compiler->m_dfsTree = compiler->fgComputeLoopAwareDfs();

assert((blockSequence == nullptr) && (bbSeqCount == 0));
FlowGraphDfsTree* const dfsTree = compiler->fgComputeDfs</* useProfile */ true>();
FlowGraphDfsTree* const dfsTree = compiler->m_dfsTree;
blockSequence = dfsTree->GetPostOrder();
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
blockInfo = new (compiler, CMK_LSRA) LsraBlockInfo[bbNumMaxBeforeResolution + 1];
Expand Down
5 changes: 4 additions & 1 deletion src/coreclr/jit/phase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,10 @@ void Phase::PostPhase(PhaseStatus status)
comp->fgDebugCheckLinkedLocals();
}

comp->fgDebugCheckFlowGraphAnnotations();
if (hasFlag(checks, PhaseChecks::CHECK_FG_ANNOTATIONS))
{
comp->fgDebugCheckFlowGraphAnnotations();
}
}
#endif // DEBUG
}
Loading