diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 0a7e0d930c3a6..00dfb10b8072b 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3186,15 +3186,18 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compReloc = jitFlags->IsSet(JitFlags::JIT_FLAG_RELOC); + bool enableFakeSplitting = false; + #ifdef DEBUG + enableFakeSplitting = JitConfig.JitFakeProcedureSplitting(); + #if defined(TARGET_XARCH) // Whether encoding of absolute addr as PC-rel offset is enabled opts.compEnablePCRelAddr = (JitConfig.EnablePCRelAddr() != 0); #endif #endif // DEBUG - opts.compProcedureSplitting = - jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || JitConfig.JitFakeProcedureSplitting(); + opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting; #ifdef TARGET_ARM64 // TODO-ARM64-NYI: enable hot/cold splitting @@ -3208,7 +3211,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) if (opts.compProcedureSplitting) { // Note that opts.compdbgCode is true under ngen for checked assemblies! - opts.compProcedureSplitting = !opts.compDbgCode; + opts.compProcedureSplitting = !opts.compDbgCode || enableFakeSplitting; #ifdef DEBUG // JitForceProcedureSplitting is used to force procedure splitting on checked assemblies. @@ -3236,13 +3239,8 @@ void Compiler::compInitOptions(JitFlags* jitFlags) #endif } - // JitFakeProcedureSplitting overrides JitNoProcedureSplitting with a fake splitting implementation - if (JitConfig.JitFakeProcedureSplitting()) - { - opts.compProcedureSplitting = true; - } - #ifdef DEBUG + // Now, set compMaxUncheckedOffsetForNullObject for STRESS_NULL_OBJECT_CHECK if (compStressCompile(STRESS_NULL_OBJECT_CHECK, 30)) { @@ -5192,6 +5190,9 @@ void Compiler::placeLoopAlignInstructions() if ((block->bbNext != nullptr) && (block->bbNext->isLoopAlign())) { + // Loop alignment is disabled for cold blocks + assert((block->bbFlags & BBF_COLD) == 0); + // If jmp was not found, then block before the loop start is where align instruction will be added. if (bbHavingAlign == nullptr) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 74b059b1c6506..599583afdeb53 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7613,7 +7613,7 @@ class Compiler // ICorJitInfo wrappers - void eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest); + void eeAllocMem(AllocMemArgs* args); void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize); diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index cc2572e890473..c7ac7b32e5e99 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1122,40 +1122,52 @@ void Compiler::eeDispLineInfos() * (e.g., host AMD64, target ARM64), then VM will get confused anyway. */ -void Compiler::eeAllocMem(AllocMemArgs* args, UNATIVE_OFFSET hotSizeRequest, UNATIVE_OFFSET coldSizeRequest) +void Compiler::eeAllocMem(AllocMemArgs* args) { +#ifdef DEBUG // Fake splitting implementation: hot section = hot code + 4K buffer + cold code - const UNATIVE_OFFSET buffer = 4096; + const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize; + const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize; + const UNATIVE_OFFSET fakeSplittingBuffer = 4096; + if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) { - args->hotCodeSize = hotSizeRequest + buffer + coldSizeRequest; + args->hotCodeSize = hotSizeRequest + fakeSplittingBuffer + coldSizeRequest; args->coldCodeSize = 0; } +#endif info.compCompHnd->allocMem(args); - // Fix up hot/cold code pointers +#ifdef DEBUG if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) { - args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + buffer; - args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + buffer; + // Fix up hot/cold code pointers + args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest + fakeSplittingBuffer; + args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest + fakeSplittingBuffer; + + // Reset args' hot/cold code sizes in case caller reads them later + args->hotCodeSize = hotSizeRequest; + args->coldCodeSize = coldSizeRequest; } +#endif } void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize) { - // Fake splitting currently does not handle unwind info for cold code - if (isColdCode && JitConfig.JitFakeProcedureSplitting()) - { - return; - } - #ifdef DEBUG if (verbose) { printf("reserveUnwindInfo(isFunclet=%s, isColdCode=%s, unwindSize=0x%x)\n", isFunclet ? "true" : "false", isColdCode ? "true" : "false", unwindSize); } + + // Fake splitting currently does not handle unwind info for cold code + if (isColdCode && JitConfig.JitFakeProcedureSplitting()) + { + JITDUMP("reserveUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n"); + return; + } #endif // DEBUG if (info.compMatchedVM) @@ -1172,12 +1184,6 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode, BYTE* pUnwindBlock, CorJitFuncKind funcKind) { - // Fake splitting currently does not handle unwind info for cold code - if (pColdCode && JitConfig.JitFakeProcedureSplitting()) - { - return; - } - #ifdef DEBUG if (verbose) { @@ -1201,6 +1207,13 @@ void Compiler::eeAllocUnwindInfo(BYTE* pHotCode, } printf(")\n"); } + + // Fake splitting currently does not handle unwind info for cold code + if (pColdCode && JitConfig.JitFakeProcedureSplitting()) + { + JITDUMP("allocUnwindInfo for cold code with JitFakeProcedureSplitting enabled: ignoring cold unwind info\n"); + return; + } #endif // DEBUG if (info.compMatchedVM) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index 5f040b4c21b32..10c5c096f8934 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6065,7 +6065,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); + emitComp->eeAllocMem(&args); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6083,7 +6083,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args, emitTotalHotCodeSize, emitTotalColdCodeSize); + emitComp->eeAllocMem(&args); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index a243711cb1c3d..6caaf338ffea9 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -5963,9 +5963,10 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication) (bNext != nullptr) && // block is not the last block (bNext->bbRefs == 1) && // No other block jumps to bNext (bNext->bbJumpKind == BBJ_ALWAYS) && // The next block is a BBJ_ALWAYS block - bNext->isEmpty() && // and it is an an empty block + bNext->isEmpty() && // and it is an empty block (bNext != bNext->bbJumpDest) && // special case for self jumps - (bDest != fgFirstColdBlock)) + (bDest != fgFirstColdBlock) && + (!fgInDifferentRegions(block, bDest))) // do not cross hot/cold sections { // case (a) // diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 7782c35597ed7..3414bdd0eaea3 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -3414,49 +3414,64 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() BasicBlock* block; BasicBlock* lblk; - for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) - { - bool blockMustBeInHotSection = false; + bool forceSplit = false; -#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION - if (bbIsHandlerBeg(block)) - { - blockMustBeInHotSection = true; - } -#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION +#ifdef DEBUG + // If stress-splitting, split right after the first block; don't handle functions with EH + forceSplit = JitConfig.JitStressProcedureSplitting() && (compHndBBtabCount == 0); +#endif - // Do we have a candidate for the first cold block? - if (firstColdBlock != nullptr) + if (forceSplit) + { + firstColdBlock = fgFirstBB->bbNext; + prevToFirstColdBlock = fgFirstBB; + } + else + { + for (lblk = nullptr, block = fgFirstBB; block != nullptr; lblk = block, block = block->bbNext) { - // We have a candidate for first cold block + bool blockMustBeInHotSection = false; - // Is this a hot block? - if (blockMustBeInHotSection || (block->isRunRarely() == false)) +#if HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION + if (bbIsHandlerBeg(block)) { - // We have to restart the search for the first cold block - firstColdBlock = nullptr; - prevToFirstColdBlock = nullptr; + blockMustBeInHotSection = true; } - } - else // (firstColdBlock == NULL) - { - // We don't have a candidate for first cold block +#endif // HANDLER_ENTRY_MUST_BE_IN_HOT_SECTION - // Is this a cold block? - if (!blockMustBeInHotSection && (block->isRunRarely() == true)) + // Do we have a candidate for the first cold block? + if (firstColdBlock != nullptr) { - // - // If the last block that was hot was a BBJ_COND - // then we will have to add an unconditional jump - // so the code size for block needs be large - // enough to make it worth our while - // - if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8)) + // We have a candidate for first cold block + + // Is this a hot block? + if (blockMustBeInHotSection || (block->isRunRarely() == false)) { - // This block is now a candidate for first cold block - // Also remember the predecessor to this block - firstColdBlock = block; - prevToFirstColdBlock = lblk; + // We have to restart the search for the first cold block + firstColdBlock = nullptr; + prevToFirstColdBlock = nullptr; + } + } + else // (firstColdBlock == NULL) + { + // We don't have a candidate for first cold block + + // Is this a cold block? + if (!blockMustBeInHotSection && (block->isRunRarely() == true)) + { + // + // If the last block that was hot was a BBJ_COND + // then we will have to add an unconditional jump + // so the code size for block needs be large + // enough to make it worth our while + // + if ((lblk == nullptr) || (lblk->bbJumpKind != BBJ_COND) || (fgGetCodeEstimate(block) >= 8)) + { + // This block is now a candidate for first cold block + // Also remember the predecessor to this block + firstColdBlock = block; + prevToFirstColdBlock = lblk; + } } } } @@ -3483,8 +3498,9 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() // then it may not be worth it to move it // into the Cold section as a jump to the // Cold section is 5 bytes in size. + // Ignore if stress-splitting. // - if (firstColdBlock->bbNext == nullptr) + if (!forceSplit && firstColdBlock->bbNext == nullptr) { // If the size of the cold block is 7 or less // then we will keep it in the Hot section. @@ -3557,6 +3573,7 @@ PhaseStatus Compiler::fgDetermineFirstColdBlock() for (block = firstColdBlock; block != nullptr; block = block->bbNext) { block->bbFlags |= BBF_COLD; + block->unmarkLoopAlign(this DEBUG_ARG("Loop alignment disabled for cold blocks")); } EXIT:; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 2cc1edee74fe7..29e977cf1d8c9 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -164,6 +164,9 @@ CONFIG_INTEGER(JitStressBiasedCSE, W("JitStressBiasedCSE"), 0x101) // Intern // stress. CONFIG_INTEGER(JitStressModeNamesOnly, W("JitStressModeNamesOnly"), 0) // Internal Jit stress: if nonzero, only enable // stress modes listed in JitStressModeNames +CONFIG_INTEGER(JitStressProcedureSplitting, W("JitStressProcedureSplitting"), 0) // Always split after the first basic + // block. Skips functions with EH + // for simplicity. CONFIG_INTEGER(JitStressRegs, W("JitStressRegs"), 0) CONFIG_STRING(JitStressRegsRange, W("JitStressRegsRange")) // Only apply JitStressRegs to methods in this hash range @@ -196,7 +199,8 @@ CONFIG_INTEGER(JitFakeProcedureSplitting, W("JitFakeProcedureSplitting"), 0) // // For now, this disables unwind info for // cold sections, breaking stack walks. // Set COMPlus_GCgen0size=1000000 to avoid - // running the GC and breaking things. + // running the GC, which requires + // stack-walking. CONFIG_METHODSET(JitForceProcedureSplitting, W("JitForceProcedureSplitting")) CONFIG_METHODSET(JitGCDump, W("JitGCDump")) CONFIG_METHODSET(JitDebugDump, W("JitDebugDump"))