Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve reachability sets computation #84204

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/coreclr/jit/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,8 @@ class BitSetOps

// Destructively modify "bs1" to be the union of "bs1" and "bs2".
static void UnionD(Env env, BitSetType& bs1, BitSetValueArgType bs2);
// Destructively modify "bs1" to be the union of "bs1" and "bs2"; return `true` if `bs1` changed.
static bool UnionDChanged(Env env, BitSetType& bs1, BitSetValueArgType bs2);
// Returns a new BitSet that is the union of "bs1" and "bs2".
static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2);

Expand Down Expand Up @@ -374,6 +376,11 @@ class BitSetOpsWithCounter
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_UnionD);
BSO::UnionD(env, bs1, bs2);
}
static bool UnionDChanged(Env env, BitSetType& bs1, BitSetValueArgType bs2)
{
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_UnionDChanged);
return BSO::UnionDChanged(env, bs1, bs2);
}
static BitSetValueRetType Union(Env env, BitSetValueArgType bs1, BitSetValueArgType bs2)
{
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_Union);
Expand Down
41 changes: 40 additions & 1 deletion src/coreclr/jit/bitsetasshortlong.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
static unsigned CountLong(Env env, BitSetShortLongRep bs);
static bool IsEmptyUnionLong(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2);
static void UnionDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static bool UnionDLongChanged(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
static bool TryAddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
Expand Down Expand Up @@ -209,6 +210,23 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
UnionDLong(env, bs1, bs2);
}
}

static bool UnionDChanged(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2)
{
if (IsShort(env))
{
size_t bsCurrent = (size_t)bs1;
size_t bsNew = bsCurrent | ((size_t)bs2);
bool changed = bsNew != bsCurrent;
bs1 = (BitSetShortLongRep)bsNew;
return changed;
}
else
{
return UnionDLongChanged(env, bs1, bs2);
}
}

static BitSetShortLongRep Union(Env env, BitSetShortLongRep bs1, BitSetShortLongRep bs2)
{
BitSetShortLongRep res = MakeCopy(env, bs1);
Expand Down Expand Up @@ -443,7 +461,7 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
{
if (IsShort(env))
{
// Can't just shift by numBits+1, since that might be 32 (and (1 << 32( == 1, for an unsigned).
// Can't just shift by numBits+1, since that might be 32 (and (1 << 32) == 1, for an unsigned).
unsigned numBits = BitSetTraits::GetSize(env);
if (numBits == BitsInSizeT)
{
Expand Down Expand Up @@ -638,6 +656,27 @@ void BitSetOps</*BitSetType*/ BitSetShortLongRep,
}
}

template <typename Env, typename BitSetTraits>
bool BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
/*BitSetTraits*/ BitSetTraits>::UnionDLongChanged(Env env,
BitSetShortLongRep& bs1,
BitSetShortLongRep bs2)
{
assert(!IsShort(env));
bool changed = false;
unsigned len = BitSetTraits::GetArrSize(env);
for (unsigned i = 0; i < len; i++)
{
size_t bsCurrent = bs1[i];
size_t bsNew = bsCurrent | bs2[i];
changed |= bsNew != bsCurrent;
bs1[i] = bsNew;
}
return changed;
}

template <typename Env, typename BitSetTraits>
void BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/bitsetops.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ BSOPNAME(BSOP_RemoveElem)
BSOPNAME(BSOP_AddElemD)
BSOPNAME(BSOP_AddElem)
BSOPNAME(BSOP_UnionD)
BSOPNAME(BSOP_UnionDChanged)
BSOPNAME(BSOP_Union)
BSOPNAME(BSOP_IntersectionD)
BSOPNAME(BSOP_Intersection)
Expand Down
28 changes: 28 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,15 @@ Histogram bbCntTable(bbCntBuckets);
unsigned bbSizeBuckets[] = {1, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 0};
Histogram bbOneBBSizeTable(bbSizeBuckets);

unsigned domsChangedIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
Histogram domsChangedIterationTable(domsChangedIterationBuckets);

unsigned computeReachabilitySetsIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
Histogram computeReachabilitySetsIterationTable(computeReachabilitySetsIterationBuckets);

unsigned computeReachabilityIterationBuckets[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0};
Histogram computeReachabilityIterationTable(computeReachabilityIterationBuckets);

#endif // COUNT_BASIC_BLOCKS

/*****************************************************************************
Expand Down Expand Up @@ -1560,6 +1569,25 @@ void Compiler::compShutdown()
fprintf(fout, "--------------------------------------------------\n");
bbOneBBSizeTable.dump(fout);
fprintf(fout, "--------------------------------------------------\n");

fprintf(fout, "--------------------------------------------------\n");
fprintf(fout, "fgComputeDoms `while (change)` iterations:\n");
fprintf(fout, "--------------------------------------------------\n");
domsChangedIterationTable.dump(fout);
fprintf(fout, "--------------------------------------------------\n");

fprintf(fout, "--------------------------------------------------\n");
fprintf(fout, "fgComputeReachabilitySets `while (change)` iterations:\n");
fprintf(fout, "--------------------------------------------------\n");
computeReachabilitySetsIterationTable.dump(fout);
fprintf(fout, "--------------------------------------------------\n");

fprintf(fout, "--------------------------------------------------\n");
fprintf(fout, "fgComputeReachability `while (change)` iterations:\n");
fprintf(fout, "--------------------------------------------------\n");
computeReachabilityIterationTable.dump(fout);
fprintf(fout, "--------------------------------------------------\n");

#endif // COUNT_BASIC_BLOCKS

#if COUNT_LOOPS
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -11662,6 +11662,9 @@ extern size_t gcPtrMapNSize;
#if COUNT_BASIC_BLOCKS
extern Histogram bbCntTable;
extern Histogram bbOneBBSizeTable;
extern Histogram domsChangedIterationTable;
extern Histogram computeReachabilitySetsIterationTable;
extern Histogram computeReachabilityIterationTable;
#endif

/*****************************************************************************
Expand Down
72 changes: 40 additions & 32 deletions src/coreclr/jit/fgopt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ void Compiler::fgUpdateChangedFlowGraph(FlowGraphUpdates updates)
JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n");
fgRenumberBlocks();
fgComputeEnterBlocksSet();
fgDfsReversePostorder();
fgComputeReachabilitySets();
if (computeDoms)
{
Expand All @@ -219,18 +220,18 @@ void Compiler::fgUpdateChangedFlowGraph(FlowGraphUpdates updates)
//
// This also sets the BBF_GC_SAFE_POINT flag on blocks.
//
// This depends on `fgBBReversePostorder` being correct.
//
// TODO-Throughput: This algorithm consumes O(n^2) because we're using dense bitsets to
// represent reachability. While this yields O(1) time queries, it bloats the memory usage
// for large code. We can do better if we try to approach reachability by
// computing the strongly connected components of the flow graph. That way we only need
// linear memory to label every block with its SCC.
//
// Assumptions:
// Assumes the predecessor lists are correct.
//
void Compiler::fgComputeReachabilitySets()
{
assert(fgPredsComputed);
assert(fgBBReversePostorder != nullptr);

#ifdef DEBUG
fgReachabilitySetsValid = false;
Expand All @@ -250,21 +251,21 @@ void Compiler::fgComputeReachabilitySets()
// Find the reachable blocks. Also, set BBF_GC_SAFE_POINT.

bool change;
BlockSet newReach(BlockSetOps::MakeEmpty(this));
unsigned changedIterCount = 1;
do
{
change = false;

for (BasicBlock* const block : Blocks())
for (unsigned i = 1; i <= fgBBNumMax; ++i)
{
BlockSetOps::Assign(this, newReach, block->bbReach);
BasicBlock* const block = fgBBReversePostorder[i];

bool predGcSafe = (block->bbPreds != nullptr); // Do all of our predecessor blocks have a GC safe bit?

for (BasicBlock* const predBlock : block->PredBlocks())
{
/* Union the predecessor's reachability set into newReach */
BlockSetOps::UnionD(this, newReach, predBlock->bbReach);
change |= BlockSetOps::UnionDChanged(this, block->bbReach, predBlock->bbReach);

if (!(predBlock->bbFlags & BBF_GC_SAFE_POINT))
{
Expand All @@ -276,15 +277,15 @@ void Compiler::fgComputeReachabilitySets()
{
block->bbFlags |= BBF_GC_SAFE_POINT;
}

if (!BlockSetOps::Equal(this, newReach, block->bbReach))
{
BlockSetOps::Assign(this, block->bbReach, newReach);
change = true;
}
}

++changedIterCount;
} while (change);

#if COUNT_BASIC_BLOCKS
computeReachabilitySetsIterationTable.record(changedIterCount);
#endif // COUNT_BASIC_BLOCKS

#ifdef DEBUG
if (verbose)
{
Expand Down Expand Up @@ -598,15 +599,11 @@ PhaseStatus Compiler::fgComputeReachability()
madeChanges |= fgRenumberBlocks();

//
// Compute fgEnterBlks
// Compute fgEnterBlks, reverse post-order, and bbReach.
//

fgComputeEnterBlocksSet();

//
// Compute bbReach
//

fgDfsReversePostorder();
fgComputeReachabilitySets();

//
Expand All @@ -618,6 +615,10 @@ PhaseStatus Compiler::fgComputeReachability()

} while (changed);

#if COUNT_BASIC_BLOCKS
computeReachabilityIterationTable.record(passNum - 1);
#endif // COUNT_BASIC_BLOCKS

//
// Now, compute the dominators
//
Expand Down Expand Up @@ -752,19 +753,24 @@ bool Compiler::fgRemoveDeadBlocks()
// preorder and reverse postorder numbers, plus a reverse postorder for blocks.
//
// Notes:
// Assumes caller has computed the fgEnterBlkSet.
// Assumes caller has computed the fgEnterBlks set.
//
// Each block's `bbPreorderNum` and `bbPostorderNum` is set.
//
// Assumes caller has allocated the fgBBReversePostorder array.
// It will be filled in with blocks in reverse post order.
// The `fgBBReversePostorder` array is filled in with the `BasicBlock*` in reverse post-order.
//
// This algorithm only pays attention to the actual blocks. It ignores any imaginary entry block.
//
void Compiler::fgDfsReversePostorder()
{
assert(fgBBcount == fgBBNumMax);
assert(BasicBlockBitSetTraits::GetSize(this) == fgBBNumMax + 1);

// Make sure fgEnterBlks are still there in startNodes, even if they participate in a loop (i.e., there is
// an incoming edge into the block).
assert(fgEnterBlksSetValid);
assert(fgBBReversePostorder != nullptr);

fgBBReversePostorder = new (this, CMK_DominatorMemory) BasicBlock*[fgBBNumMax + 1]{};

// visited : Once we run the DFS post order sort recursive algorithm, we mark the nodes we visited to avoid
// backtracking.
Expand Down Expand Up @@ -989,13 +995,6 @@ void Compiler::fgComputeDoms()
assert(BasicBlockBitSetTraits::GetSize(this) == fgBBNumMax + 1);
#endif // DEBUG

BlockSet processedBlks(BlockSetOps::MakeEmpty(this));

fgBBReversePostorder = new (this, CMK_DominatorMemory) BasicBlock*[fgBBNumMax + 1]{};

fgDfsReversePostorder();
noway_assert(fgBBReversePostorder[0] == nullptr);

// flRoot and bbRoot represent an imaginary unique entry point in the flow graph.
// All the orphaned EH blocks and fgFirstBB will temporarily have its predecessors list
// (with bbRoot as the only basic block in it) set as flRoot.
Expand All @@ -1012,9 +1011,11 @@ void Compiler::fgComputeDoms()

FlowEdge flRoot(&bbRoot, nullptr);

noway_assert(fgBBReversePostorder[0] == nullptr);
fgBBReversePostorder[0] = &bbRoot;

// Mark both bbRoot and fgFirstBB processed
BlockSet processedBlks(BlockSetOps::MakeEmpty(this));
BlockSetOps::AddElemD(this, processedBlks, 0); // bbRoot == block #0
BlockSetOps::AddElemD(this, processedBlks, 1); // fgFirstBB == block #1
assert(fgFirstBB->bbNum == 1);
Expand Down Expand Up @@ -1057,7 +1058,8 @@ void Compiler::fgComputeDoms()
}

// Now proceed to compute the immediate dominators for each basic block.
bool changed = true;
bool changed = true;
unsigned changedIterCount = 1;
while (changed)
{
changed = false;
Expand Down Expand Up @@ -1116,8 +1118,14 @@ void Compiler::fgComputeDoms()
}
BlockSetOps::AddElemD(this, processedBlks, block->bbNum);
}

++changedIterCount;
}

#if COUNT_BASIC_BLOCKS
domsChangedIterationTable.record(changedIterCount);
#endif // COUNT_BASIC_BLOCKS

// As stated before, once we have computed immediate dominance we need to clear
// all the basic blocks whose predecessor list was set to flRoot. This
// reverts that and leaves the blocks the same as before.
Expand Down Expand Up @@ -1311,7 +1319,7 @@ void Compiler::fgNumberDomTree(DomTreeNode* domTree)
// fgIntersectDom: Intersect two immediate dominator sets.
//
// Find the lowest common ancestor in the dominator tree between two basic blocks. The LCA in the dominance tree
// represents the closest dominator between the two basic blocks. Used to adjust the IDom value in fgComputDoms.
// represents the closest dominator between the two basic blocks. Used to adjust the IDom value in fgComputeDoms.
//
// Arguments:
// a, b - two blocks to intersect
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/fgprofilesynthesis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -763,7 +763,6 @@ void ProfileSynthesis::RandomizeLikelihoods()
void ProfileSynthesis::BuildReversePostorder()
{
m_comp->EnsureBasicBlockEpoch();
m_comp->fgBBReversePostorder = new (m_comp, CMK_Pgo) BasicBlock*[m_comp->fgBBNumMax + 1]{};
m_comp->fgComputeEnterBlocksSet();
m_comp->fgDfsReversePostorder();

Expand Down