Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hide 'align' instruction behind jmp #60787

Merged
merged 24 commits into from
Nov 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
3244072
Hide align behind a jmp
kunalspathak Oct 8, 2021
e7c0710
Fix a problem where curIG==0 and loop might be emitted in curIG, adju…
kunalspathak Oct 22, 2021
bd922aa
Add stress mode to emit int3 for xarch
kunalspathak Oct 22, 2021
4d0f912
Add stress mode to emit bkpt for arm64
kunalspathak Oct 22, 2021
8d64351
Add a loop align instruction placement phase
kunalspathak Oct 29, 2021
9b9b616
review comments
kunalspathak Oct 29, 2021
6302975
Change from unsigned short to unsigned
kunalspathak Oct 29, 2021
d20da6d
review comments around cleanup
kunalspathak Nov 10, 2021
c6a2d70
emitForceNewIG
kunalspathak Nov 10, 2021
e9c5eec
Remove emitPrevIG
kunalspathak Nov 10, 2021
c1c5db3
Revert change to forceNewIG for align instruction
kunalspathak Nov 10, 2021
b8a9742
Use loopAlignCandidates
kunalspathak Nov 11, 2021
db98ec2
Use loopHeadIG reference
kunalspathak Nov 11, 2021
5ab9edc
jit format
kunalspathak Nov 11, 2021
c8a9e01
Remove unneeded method
kunalspathak Nov 11, 2021
5bb1563
Misc changes
kunalspathak Nov 11, 2021
2c6e81d
Review feedback
kunalspathak Nov 12, 2021
bbc2ac5
Do not include align behind Jmp in PerfScore calculation
kunalspathak Nov 13, 2021
64bba41
jit format and fix a bug
kunalspathak Nov 15, 2021
1e24fcb
fix the loopCandidates == 0 scenario
kunalspathak Nov 15, 2021
b301fa5
Add unmarkLoopAlign(), add check for fgFirstBB
kunalspathak Nov 16, 2021
57759d0
merge conflict fix
kunalspathak Nov 16, 2021
ef0e859
Add missing }
kunalspathak Nov 16, 2021
976b253
Grammar nit
kunalspathak Nov 18, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions src/coreclr/jit/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,3 +1662,22 @@ BBswtDesc::BBswtDesc(Compiler* comp, const BBswtDesc* other)
bbsDstTab[i] = other->bbsDstTab[i];
}
}

//------------------------------------------------------------------------
// unmarkLoopAlign: Unmarks the LOOP_ALIGN flag from the block and reduce the
// loop alignment count.
//
// Arguments:
// compiler - Compiler instance
// reason - Reason to print in JITDUMP
//
void BasicBlock::unmarkLoopAlign(Compiler* compiler DEBUG_ARG(const char* reason))
{
// Make sure we unmark and count just once.
if (isLoopAlign())
{
compiler->loopAlignCandidates--;
bbFlags &= ~BBF_LOOP_ALIGN;
JITDUMP("Unmarking LOOP_ALIGN from " FMT_BB ". Reason= %s.", bbNum, reason);
}
}
9 changes: 9 additions & 0 deletions src/coreclr/jit/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ enum BasicBlockFlags : unsigned __int64
BBF_PATCHPOINT = MAKE_BBFLAG(36), // Block is a patchpoint
BBF_HAS_CLASS_PROFILE = MAKE_BBFLAG(37), // BB contains a call needing a class profile
BBF_PARTIAL_COMPILATION_PATCHPOINT = MAKE_BBFLAG(38), // Block is a partial compilation patchpoint
BBF_HAS_ALIGN = MAKE_BBFLAG(39), // BB ends with 'align' instruction

// The following are sets of flags.

Expand Down Expand Up @@ -653,11 +654,19 @@ struct BasicBlock : private LIR::Range
{
return ((bbFlags & BBF_LOOP_HEAD) != 0);
}

bool isLoopAlign() const
{
return ((bbFlags & BBF_LOOP_ALIGN) != 0);
}

void unmarkLoopAlign(Compiler* comp DEBUG_ARG(const char* reason));

bool hasAlign() const
{
return ((bbFlags & BBF_HAS_ALIGN) != 0);
}

#ifdef DEBUG
void dspFlags(); // Print the flags
unsigned dspCheapPreds(); // Print the predecessors (bbCheapPreds)
Expand Down
31 changes: 20 additions & 11 deletions src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ void CodeGen::genCodeForBBlist()

for (block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
{

#ifdef DEBUG
if (compiler->verbose)
{
Expand Down Expand Up @@ -782,21 +783,29 @@ void CodeGen::genCodeForBBlist()
}

#if FEATURE_LOOP_ALIGN
if (block->hasAlign())
{
// If this block has 'align' instruction in the end (identified by BBF_HAS_ALIGN),
// then need to add align instruction in the current "block".
//
// For non-adaptive alignment, add alignment instruction of size depending on the
// compJitAlignLoopBoundary.
// For adaptive alignment, alignment instruction will always be of 15 bytes for xarch
// and 16 bytes for arm64.
assert(ShouldAlignLoops());

// If next block is the first block of a loop (identified by BBF_LOOP_ALIGN),
// then need to add align instruction in current "block". Also mark the
// corresponding IG with IGF_LOOP_ALIGN to know that there will be align
// instructions at the end of that IG.
//
// For non-adaptive alignment, add alignment instruction of size depending on the
// compJitAlignLoopBoundary.
// For adaptive alignment, alignment instruction will always be of 15 bytes.
GetEmitter()->emitLoopAlignment(DEBUG_ARG1(block->bbJumpKind == BBJ_ALWAYS));
}

if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign()))
{
assert(ShouldAlignLoops());

GetEmitter()->emitLoopAlignment();
if (compiler->opts.compJitHideAlignBehindJmp)
{
// The current IG is the one that is just before the IG having loop start.
// Establish a connection of recent align instruction emitted to the loop
// it actually is aligning using 'idaLoopHeadPredIG'.
GetEmitter()->emitConnectAlignInstrWithCurIG();
}
}
#endif

Expand Down
83 changes: 83 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2547,11 +2547,13 @@ void Compiler::compInitOptions(JitFlags* jitFlags)

opts.compJitAlignLoopForJcc = JitConfig.JitAlignLoopForJcc() == 1;
opts.compJitAlignLoopMaxCodeSize = (unsigned short)JitConfig.JitAlignLoopMaxCodeSize();
opts.compJitHideAlignBehindJmp = JitConfig.JitHideAlignBehindJmp() == 1;
#else
opts.compJitAlignLoopAdaptive = true;
opts.compJitAlignLoopBoundary = DEFAULT_ALIGN_LOOP_BOUNDARY;
opts.compJitAlignLoopMinBlockWeight = DEFAULT_ALIGN_LOOP_MIN_BLOCK_WEIGHT;
opts.compJitAlignLoopMaxCodeSize = DEFAULT_MAX_LOOPSIZE_FOR_ALIGN;
opts.compJitHideAlignBehindJmp = true;
#endif

#ifdef TARGET_XARCH
Expand Down Expand Up @@ -5152,6 +5154,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
fgDebugCheckLinks();
#endif

#if FEATURE_LOOP_ALIGN
// Place loop alignment instructions
DoPhase(this, PHASE_ALIGN_LOOPS, &Compiler::placeLoopAlignInstructions);
#endif

// Generate code
codeGen->genGenerateCode(methodCodePtr, methodCodeSize);

Expand Down Expand Up @@ -5208,6 +5215,82 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
#endif // FUNC_INFO_LOGGING
}

#if FEATURE_LOOP_ALIGN

//------------------------------------------------------------------------
// placeLoopAlignInstructions: Iterate over all the blocks and determine
// the best position to place the 'align' instruction. Inserting 'align'
// instructions after an unconditional branch is preferred over inserting
// in the block before the loop. In case there are multiple blocks
// having 'jmp', the one that has lower weight is preferred.
// If the block having 'jmp' is hotter than the block before the loop,
// the align will still be placed after 'jmp' because the processor should
// be smart enough to not fetch extra instruction beyond jmp.
//
void Compiler::placeLoopAlignInstructions()
{
if (loopAlignCandidates == 0)
{
return;
}

int loopsToProcess = loopAlignCandidates;

// Add align only if there were any loops that needed alignment
weight_t minBlockSoFar = BB_MAX_WEIGHT;
BasicBlock* bbHavingAlign = nullptr;
for (BasicBlock* const block : Blocks())
{
if ((block == fgFirstBB) && block->isLoopAlign())
{
// Adding align instruction in prolog is not supported
// hence skip the align block if it is the first block.
loopsToProcess--;
continue;
}
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved

// If there is a unconditional jump
if (opts.compJitHideAlignBehindJmp && (block->bbJumpKind == BBJ_ALWAYS))
{
if (block->bbWeight < minBlockSoFar)
{
minBlockSoFar = block->bbWeight;
bbHavingAlign = block;
JITDUMP(FMT_BB ", bbWeight=" FMT_WT " ends with unconditional 'jmp' \n", block->bbNum, block->bbWeight);
}
}

if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign()))
{
// If jmp was not found, then block before the loop start is where align instruction will be added.
if (bbHavingAlign == nullptr)
{
bbHavingAlign = block;
JITDUMP("Marking " FMT_BB " before the loop with BBF_HAS_ALIGN for loop at " FMT_BB "\n", block->bbNum,
block->bbNext->bbNum);
}
else
{
JITDUMP("Marking " FMT_BB " that ends with unconditional jump with BBF_HAS_ALIGN for loop at " FMT_BB
"\n",
bbHavingAlign->bbNum, block->bbNext->bbNum);
}

bbHavingAlign->bbFlags |= BBF_HAS_ALIGN;
minBlockSoFar = BB_MAX_WEIGHT;
bbHavingAlign = nullptr;

if (--loopsToProcess == 0)
{
break;
}
}
}

assert(loopsToProcess == 0);
}
#endif

//------------------------------------------------------------------------
// generatePatchpointInfo: allocate and fill in patchpoint info data,
// and report it to the VM
Expand Down
14 changes: 9 additions & 5 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3666,6 +3666,7 @@ class Compiler
#endif

BasicBlock* bbNewBasicBlock(BBjumpKinds jumpKind);
void placeLoopAlignInstructions();

/*
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
Expand Down Expand Up @@ -6871,13 +6872,13 @@ class Compiler
bool fgHasLoops; // True if this method has any loops, set in fgComputeReachability

public:
LoopDsc* optLoopTable; // loop descriptor table
unsigned char optLoopCount; // number of tracked loops
LoopDsc* optLoopTable; // loop descriptor table
unsigned char optLoopCount; // number of tracked loops
unsigned char loopAlignCandidates; // number of loops identified for alignment

#ifdef DEBUG
unsigned char loopAlignCandidates; // number of loops identified for alignment
unsigned char loopsAligned; // number of loops actually aligned
#endif // DEBUG
unsigned char loopsAligned; // number of loops actually aligned
#endif // DEBUG

bool optRecordLoop(BasicBlock* head,
BasicBlock* top,
Expand Down Expand Up @@ -9687,6 +9688,9 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
// If set, perform adaptive loop alignment that limits number of padding based on loop size.
bool compJitAlignLoopAdaptive;

// If set, tries to hide alignment instructions behind unconditional jumps.
bool compJitHideAlignBehindJmp;

#ifdef LATE_DISASM
bool doLateDisasm; // Run the late disassembler
#endif // LATE_DISASM
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ CompPhaseNameMacro(PHASE_INSERT_GC_POLLS, "Insert GC Polls",
CompPhaseNameMacro(PHASE_DETERMINE_FIRST_COLD_BLOCK, "Determine first cold block", "COLD-BLK", false, -1, true)
CompPhaseNameMacro(PHASE_RATIONALIZE, "Rationalize IR", "RAT", false, -1, false)
CompPhaseNameMacro(PHASE_SIMPLE_LOWERING, "Do 'simple' lowering", "SMP-LWR", false, -1, false)
CompPhaseNameMacro(PHASE_ALIGN_LOOPS, "Place 'align' instructions", "LOOP-ALIGN", false, -1, false)

CompPhaseNameMacro(PHASE_LCLVARLIVENESS, "Local var liveness", "LIVENESS", true, -1, false)
CompPhaseNameMacro(PHASE_LCLVARLIVENESS_INIT, "Local var liveness init", "LIV-INIT", false, PHASE_LCLVARLIVENESS, false)
Expand Down
Loading