From bc1a8722f6ade50fae6162708cea7c0b6b5d23fe Mon Sep 17 00:00:00 2001 From: Aman Khalid Date: Wed, 22 Jun 2022 13:53:14 -0700 Subject: [PATCH] Enable fake hot/cold splitting on ARM64 (#70708) This commit contains fixes for various bugs exposed by enabling fake hot/cold splitting on ARM64: - Branches between hot/cold sections are now always long. - The pseudoinstruction for loading a constant from the cold section did not support loading 16-byte data into vector registers, as it temporarily loaded the constant into an 8-byte integer register. Now, 16-byte constants are loaded directly into vector registers via an `ld1` instruction. - Asserts/NYIs blocking hot/cold splitting on ARM64 have been removed. Fake hot/cold splitting requires we fake unwind info by treating each split function as one hot section. A more architecture-agnostic approach for this has been applied. To facilitate this approach, the fake-splitting implementation has been revised to place the hot and cold sections contiguously in memory (immediately followed by the read-only data section on ARM64). --- src/coreclr/jit/compiler.cpp | 6 +- src/coreclr/jit/compiler.h | 6 +- src/coreclr/jit/ee_il_dll.cpp | 62 ++++++-- src/coreclr/jit/emit.cpp | 39 +---- src/coreclr/jit/emit.h | 2 +- src/coreclr/jit/emitarm64.cpp | 264 +++++++++++++++++++++----------- src/coreclr/jit/unwind.cpp | 18 ++- src/coreclr/jit/unwindamd64.cpp | 67 +++----- src/coreclr/jit/unwindarm.cpp | 45 ++++-- src/coreclr/jit/unwindx86.cpp | 63 +++----- 10 files changed, 330 insertions(+), 242 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 0ac148774aaf6..460412d84597e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -3199,10 +3199,10 @@ void Compiler::compInitOptions(JitFlags* jitFlags) opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting; -#ifdef TARGET_ARM64 - // TODO-ARM64-NYI: enable hot/cold splitting +#ifdef TARGET_LOONGARCH64 + // Hot/cold splitting is not being tested on LoongArch64. opts.compProcedureSplitting = false; -#endif // TARGET_ARM64 +#endif // TARGET_LOONGARCH64 #ifdef DEBUG opts.compProcedureSplittingEH = opts.compProcedureSplitting; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index df9fcec6a78ab..b5bfdf926f212 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7660,7 +7660,7 @@ class Compiler // ICorJitInfo wrappers - void eeAllocMem(AllocMemArgs* args); + void eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment); void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize); @@ -8017,10 +8017,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode); void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode); -#ifdef DEBUG - void fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode); -#endif // DEBUG - #endif // TARGET_AMD64 || (TARGET_X86 && FEATURE_EH_FUNCLETS) UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func); diff --git a/src/coreclr/jit/ee_il_dll.cpp b/src/coreclr/jit/ee_il_dll.cpp index f8c437e326694..d09bffa0a5e9a 100644 --- a/src/coreclr/jit/ee_il_dll.cpp +++ b/src/coreclr/jit/ee_il_dll.cpp @@ -1122,34 +1122,64 @@ void Compiler::eeDispLineInfos() * (e.g., host AMD64, target ARM64), then VM will get confused anyway. */ -void Compiler::eeAllocMem(AllocMemArgs* args) +void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment) { #ifdef DEBUG - const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize; - const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize; - // Fake splitting implementation: place hot/cold code in contiguous section - if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) + // Fake splitting implementation: place hot/cold code in contiguous section. + UNATIVE_OFFSET coldCodeOffset = 0; + if (JitConfig.JitFakeProcedureSplitting() && (args->coldCodeSize > 0)) { - args->hotCodeSize = hotSizeRequest + coldSizeRequest; + coldCodeOffset = args->hotCodeSize; + assert(coldCodeOffset > 0); + args->hotCodeSize += args->coldCodeSize; args->coldCodeSize = 0; } -#endif + +#endif // DEBUG + +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + + // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does. + // This way allows us to use a single `ldr` to access such data like float constant/jmp table. + // For LoongArch64 using `pcaddi + ld` to access such data. + + UNATIVE_OFFSET roDataAlignmentDelta = 0; + if (args->roDataSize > 0) + { + roDataAlignmentDelta = AlignmentPad(args->hotCodeSize, roDataSectionAlignment); + } + + const UNATIVE_OFFSET roDataOffset = args->hotCodeSize + roDataAlignmentDelta; + args->hotCodeSize = roDataOffset + args->roDataSize; + args->roDataSize = 0; + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) info.compCompHnd->allocMem(args); #ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0)) - { - // Fix up hot/cold code pointers - args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest; - args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest; - // Reset args' hot/cold code sizes in case caller reads them later - args->hotCodeSize = hotSizeRequest; - args->coldCodeSize = coldSizeRequest; + if (JitConfig.JitFakeProcedureSplitting() && (coldCodeOffset > 0)) + { + // Fix up cold code pointers. Cold section is adjacent to hot section. + assert(args->coldCodeBlock == nullptr); + assert(args->coldCodeBlockRW == nullptr); + args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + coldCodeOffset; + args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + coldCodeOffset; } -#endif + +#endif // DEBUG + +#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) + + // Fix up data section pointers. + assert(args->roDataBlock == nullptr); + assert(args->roDataBlockRW == nullptr); + args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset; + args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset; + +#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) } void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index ac9bd8121aa4f..eb846e06a0dc6 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -4561,7 +4561,6 @@ void emitter::emitJumpDistBind() else if (emitIsUncondJump(jmp)) { // Nothing to do; we don't shrink these. - assert(jmp->idjShort); ssz = JMP_SIZE_SMALL; } else if (emitIsLoadLabel(jmp)) @@ -6350,47 +6349,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, AllocMemArgs args; memset(&args, 0, sizeof(args)); -#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) - // For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does. - // This way allows us to use a single `ldr` to access such data like float constant/jmp table. - // For LoongArch64 using `pcaddi + ld` to access such data. - if (emitTotalColdCodeSize > 0) - { - // JIT data might be far away from the cold code. - NYI("Need to handle fix-up to data from cold code."); - } - - UNATIVE_OFFSET roDataAlignmentDelta = 0; - if (emitConsDsc.dsdOffs > 0) - { - roDataAlignmentDelta = AlignmentPad(emitTotalHotCodeSize, dataAlignment); - } - - args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs; - args.coldCodeSize = emitTotalColdCodeSize; - args.roDataSize = 0; - args.xcptnsCount = xcptnsCount; - args.flag = allocMemFlag; - - emitComp->eeAllocMem(&args); - - codeBlock = (BYTE*)args.hotCodeBlock; - codeBlockRW = (BYTE*)args.hotCodeBlockRW; - coldCodeBlock = (BYTE*)args.coldCodeBlock; - coldCodeBlockRW = (BYTE*)args.coldCodeBlockRW; - - consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta; - consBlockRW = codeBlockRW + emitTotalHotCodeSize + roDataAlignmentDelta; - -#else - args.hotCodeSize = emitTotalHotCodeSize; args.coldCodeSize = emitTotalColdCodeSize; args.roDataSize = emitConsDsc.dsdOffs; args.xcptnsCount = xcptnsCount; args.flag = allocMemFlag; - emitComp->eeAllocMem(&args); + emitComp->eeAllocMem(&args, emitConsDsc.alignment); codeBlock = (BYTE*)args.hotCodeBlock; codeBlockRW = (BYTE*)args.hotCodeBlockRW; @@ -6399,8 +6364,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, consBlock = (BYTE*)args.roDataBlock; consBlockRW = (BYTE*)args.roDataBlockRW; -#endif - #ifdef DEBUG if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0) { diff --git a/src/coreclr/jit/emit.h b/src/coreclr/jit/emit.h index 053381eb8f027..d44fd1bd572ee 100644 --- a/src/coreclr/jit/emit.h +++ b/src/coreclr/jit/emit.h @@ -997,7 +997,7 @@ class emitter case IF_LARGELDC: if (isVectorRegister(idReg1())) { - // adrp + ldr + fmov + // (adrp + ldr + fmov) or (adrp + add + ld1) size = 12; } else diff --git a/src/coreclr/jit/emitarm64.cpp b/src/coreclr/jit/emitarm64.cpp index e60d8d6a69ee7..b0bf1769d1bd0 100644 --- a/src/coreclr/jit/emitarm64.cpp +++ b/src/coreclr/jit/emitarm64.cpp @@ -8438,10 +8438,12 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) switch (ins) { case INS_bl_local: + idjShort = true; + FALLTHROUGH; case INS_b: // Unconditional jump is a single form. - idjShort = true; - fmt = IF_BI_0A; + // Assume is long in case we cross hot/cold sections. + fmt = IF_BI_0A; break; case INS_beq: @@ -8486,7 +8488,6 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount) id->idAddr()->iiaBBlabel = dst; // Skip unconditional jump that has a single form. - // TODO-ARM64-NYI: enable hot/cold splittingNYI. // The target needs to be relocated. if (!idjShort) { @@ -9816,38 +9817,67 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) { // Update addrReg with the reserved integer register // since we cannot use dstReg (vector) to load constant directly from memory. - addrReg = id->idReg2(); + + // If loading a 16-byte value, we will need to load directly into dstReg. + // Thus, encode addrReg for the ld1 instruction. + if (opSize == EA_16BYTE) + { + addrReg = encodingSPtoZR(id->idReg2()); + } + else + { + addrReg = id->idReg2(); + } + assert(isGeneralRegister(addrReg)); } + ins = INS_adrp; fmt = IF_DI_1E; dst = emitOutputShortAddress(dst, ins, fmt, relPageAddr, addrReg); - // ldr x, [x, page offs] -- load constant from page address + page offset into integer register. ssize_t imm12 = (ssize_t)dstAddr & 0xFFF; // 12 bits assert(isValidUimm12(imm12)); - ins = INS_ldr; - fmt = IF_LS_2B; - dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize); - // fmov v, d -- copy constant in integer register to vector register. - // This is needed only for vector constant. - if (addrReg != dstReg) + // Special case: emit add + ld1 instructions for loading 16-byte data into vector register. + if (isVectorRegister(dstReg) && (opSize == EA_16BYTE)) { - // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn - // (scalar, from general) - assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg)); - ins = INS_fmov; - fmt = IF_DV_2I; - code_t code = emitInsCode(ins, fmt); + const emitAttr elemSize = EA_1BYTE; + const insOpts opt = optMakeArrangement(opSize, elemSize); - code |= insEncodeReg_Vd(dstReg); // ddddd - code |= insEncodeReg_Rn(addrReg); // nnnnn - if (id->idOpSize() == EA_8BYTE) + assert(isGeneralRegisterOrSP(addrReg)); + assert(isValidVectorElemsize(elemSize)); + assert(isValidArrangement(opSize, opt)); + + // Calculate page addr + page offs, then emit ld1 instruction. + dst = emitOutputVectorConstant(dst, imm12, dstReg, addrReg, opSize, elemSize); + } + else + { + // ldr x, [x, 0] -- load constant from address into integer register. + ins = INS_ldr; + fmt = IF_LS_2B; + dst = emitOutputShortConstant(dst, ins, fmt, imm12, addrReg, opSize); + + // fmov v, d -- copy constant in integer register to vector register. + // This is needed only for vector constant. + if (addrReg != dstReg) { - code |= 0x80400000; // X ... X + // fmov Vd,Rn DV_2I X00111100X100111 000000nnnnnddddd 1E27 0000 Vd,Rn + // (scalar, from general) + assert(isVectorRegister(dstReg) && isGeneralRegister(addrReg)); + ins = INS_fmov; + fmt = IF_DV_2I; + code_t code = emitInsCode(ins, fmt); + + code |= insEncodeReg_Vd(dstReg); // ddddd + code |= insEncodeReg_Rn(addrReg); // nnnnn + if (id->idOpSize() == EA_8BYTE) + { + code |= 0x80400000; // X ... X + } + dst += emitOutput_Instr(dst, code); } - dst += emitOutput_Instr(dst, code); } } } @@ -9950,12 +9980,6 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) /* For forward jumps, record the address of the distance value */ id->idjTemp.idjAddr = (distVal > 0) ? dst : NULL; - if (emitJumpCrossHotColdBoundary(srcOffs, dstOffs)) - { - assert(!id->idjShort); - NYI_ARM64("Relocation Support for long address"); - } - assert(insOptsNone(id->idInsOpt())); if (isJump) @@ -9966,75 +9990,114 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) assert(!id->idjKeepLong); assert(emitJumpCrossHotColdBoundary(srcOffs, dstOffs) == false); assert((fmt == IF_BI_0A) || (fmt == IF_BI_0B) || (fmt == IF_BI_1A) || (fmt == IF_BI_1B)); + dst = emitOutputShortBranch(dst, ins, fmt, distVal, id); } else { - // Long conditional jump - assert(fmt == IF_LARGEJMP); - // This is a pseudo-instruction format representing a large conditional branch, to allow - // us to get a greater branch target range than we can get by using a straightforward conditional - // branch. It is encoded as a short conditional branch that branches around a long unconditional - // branch. - // - // Conceptually, we have: - // - // b L_target - // - // The code we emit is: - // - // b L_not // 4 bytes. Note that we reverse the condition. - // b L_target // 4 bytes - // L_not: - // - // Note that we don't actually insert any blocks: we simply encode "b L_not" as a branch with - // the correct offset. Note also that this works for both integer and floating-point conditions, because - // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example, - // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered). + // Long conditional/unconditional jump - instruction reverseIns; - insFormat reverseFmt; + if (fmt == IF_LARGEJMP) + { + // This is a pseudo-instruction format representing a large conditional branch, to allow + // us to get a greater branch target range than we can get by using a straightforward conditional + // branch. It is encoded as a short conditional branch that branches around a long unconditional + // branch. + // + // Conceptually, we have: + // + // b L_target + // + // The code we emit is: + // + // b L_not // 4 bytes. Note that we reverse the condition. + // b L_target // 4 bytes + // L_not: + // + // Note that we don't actually insert any blocks: we simply encode "b L_not" as a branch with + // the correct offset. Note also that this works for both integer and floating-point conditions, because + // the condition inversion takes ordered/unordered into account, preserving NaN behavior. For example, + // "GT" (greater than) is inverted to "LE" (less than, equal, or unordered). - switch (ins) + instruction reverseIns; + insFormat reverseFmt; + + switch (ins) + { + case INS_cbz: + reverseIns = INS_cbnz; + reverseFmt = IF_BI_1A; + break; + case INS_cbnz: + reverseIns = INS_cbz; + reverseFmt = IF_BI_1A; + break; + case INS_tbz: + reverseIns = INS_tbnz; + reverseFmt = IF_BI_1B; + break; + case INS_tbnz: + reverseIns = INS_tbz; + reverseFmt = IF_BI_1B; + break; + default: + reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); + reverseFmt = IF_BI_0B; + } + + dst = emitOutputShortBranch(dst, + reverseIns, // reverse the conditional instruction + reverseFmt, 8, /* 8 bytes from start of this large conditional + pseudo-instruction to L_not. */ + id); + + // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that. + ins = INS_b; + fmt = IF_BI_0A; + + // The distVal was computed based on the beginning of the pseudo-instruction, + // So subtract the size of the conditional branch so that it is relative to the + // unconditional branch. + distVal -= 4; + } + + assert(fmt == IF_BI_0A); + assert((distVal & 1) == 0); + code_t code = emitInsCode(ins, fmt); + const bool recordRelocation = emitComp->opts.compReloc && emitJumpCrossHotColdBoundary(srcOffs, dstOffs); + + if (recordRelocation) { - case INS_cbz: - reverseIns = INS_cbnz; - reverseFmt = IF_BI_1A; - break; - case INS_cbnz: - reverseIns = INS_cbz; - reverseFmt = IF_BI_1A; - break; - case INS_tbz: - reverseIns = INS_tbnz; - reverseFmt = IF_BI_1B; - break; - case INS_tbnz: - reverseIns = INS_tbz; - reverseFmt = IF_BI_1B; - break; - default: - reverseIns = emitJumpKindToIns(emitReverseJumpKind(emitInsToJumpKind(ins))); - reverseFmt = IF_BI_0B; + // dst isn't an actual final target location, just some intermediate + // location. Thus we cannot make any guarantees about distVal (not + // even the direction/sign). Instead we don't encode any offset and + // rely on the relocation to do all the work + } + else + { + // Branch offset encodings are scaled by 4. + noway_assert((distVal & 3) == 0); + distVal >>= 2; + noway_assert(isValidSimm26(distVal)); + + // Insert offset into unconditional branch instruction + distVal &= 0x3FFFFFFLL; + code |= distVal; } - dst = - emitOutputShortBranch(dst, - reverseIns, // reverse the conditional instruction - reverseFmt, - 8, /* 8 bytes from start of this large conditional pseudo-instruction to L_not. */ - id); + const unsigned instrSize = emitOutput_Instr(dst, code); - // Now, pretend we've got a normal unconditional branch, and fall through to the code to emit that. - ins = INS_b; - fmt = IF_BI_0A; + if (recordRelocation) + { + assert(id->idjKeepLong); + if (emitComp->info.compMatchedVM) + { + void* target = emitOffsetToPtr(dstOffs); + emitRecordRelocation((void*)dst, target, IMAGE_REL_ARM64_BRANCH26); + } + } - // The distVal was computed based on the beginning of the pseudo-instruction, - // So subtract the size of the conditional branch so that it is relative to the - // unconditional branch. - distVal -= 4; + dst += instrSize; } - - dst = emitOutputShortBranch(dst, ins, fmt, distVal, id); } else if (loadLabel) { @@ -10155,7 +10218,7 @@ BYTE* emitter::emitOutputShortConstant( ssize_t loBits = (imm & 3); noway_assert(loBits == 0); - ssize_t distVal = imm >>= 2; // load offset encodings are scaled by 4. + ssize_t distVal = imm >> 2; // load offset encodings are scaled by 4. noway_assert(isValidSimm19(distVal)); @@ -10223,6 +10286,33 @@ BYTE* emitter::emitOutputShortConstant( return dst; } + +/***************************************************************************** + * + * Output instructions to load a constant into a vector register. + */ +BYTE* emitter::emitOutputVectorConstant( + BYTE* dst, ssize_t imm, regNumber dstReg, regNumber addrReg, emitAttr opSize, emitAttr elemSize) +{ + // add addrReg, addrReg, page offs -- compute address = page addr + page offs. + code_t code = emitInsCode(INS_add, IF_DI_2A); // DI_2A X0010001shiiiiii iiiiiinnnnnddddd 1100 0000 imm(i12, sh) + code |= insEncodeDatasize(EA_8BYTE); // X - use EA_8BYTE, as we are calculating 64-bit address + code |= ((code_t)imm << 10); // iiiiiiiiiiii + code |= insEncodeReg_Rd(addrReg); // ddddd + code |= insEncodeReg_Rn(addrReg); // nnnnn + dst += emitOutput_Instr(dst, code); + + // ld1 dstReg, addrReg -- load constant at address in addrReg into dstReg. + code = emitInsCode(INS_ld1, IF_LS_2D); // LS_2D .Q.............. ....ssnnnnnttttt Vt Rn + code |= insEncodeVectorsize(opSize); // Q + code |= insEncodeVLSElemsize(elemSize); // ss + code |= insEncodeReg_Rn(addrReg); // nnnnn + code |= insEncodeReg_Vt(dstReg); // ttttt + dst += emitOutput_Instr(dst, code); + + return dst; +} + /***************************************************************************** * * Output a call instruction. diff --git a/src/coreclr/jit/unwind.cpp b/src/coreclr/jit/unwind.cpp index 6ad60a064f35c..63c4ed716cf39 100644 --- a/src/coreclr/jit/unwind.cpp +++ b/src/coreclr/jit/unwind.cpp @@ -69,7 +69,16 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc* func, // The hot section only goes up to the cold section assert(fgFirstFuncletBB == nullptr); - *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock)); +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting()) + { + *ppEndLoc = nullptr; // If fake-splitting, "trick" VM by pretending entire function is hot. + } + else +#endif // DEBUG + { + *ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock)); + } } else { @@ -259,6 +268,13 @@ void Compiler::unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdC DWORD unwindCodeBytes = 0; BYTE* pUnwindBlock = nullptr; +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting()) + { + pColdCode = nullptr; + } +#endif // DEBUG + if (func->startLoc == nullptr) { startOffset = 0; diff --git a/src/coreclr/jit/unwindamd64.cpp b/src/coreclr/jit/unwindamd64.cpp index 2c8e90fa5a944..88cefbe31ed5e 100644 --- a/src/coreclr/jit/unwindamd64.cpp +++ b/src/coreclr/jit/unwindamd64.cpp @@ -656,18 +656,17 @@ void Compiler::unwindReserve() // void Compiler::unwindReserveFunc(FuncInfoDsc* func) { -#ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr)) + unwindReserveFuncHelper(func, true); + + if (fgFirstColdBlock != nullptr) { - assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets. - unwindReserveFuncHelper(func, true); - } - else +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting()) + { + assert(func->funKind == FUNC_ROOT); // No splitting of funclets. + } + else #endif // DEBUG - { - unwindReserveFuncHelper(func, true); - - if (fgFirstColdBlock != nullptr) { unwindReserveFuncHelper(func, false); } @@ -859,7 +858,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo if (isHotCode) { - assert(endOffset <= info.compTotalHotCodeSize); +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr)) + { + assert(endOffset <= info.compNativeCodeSize); + } + else +#endif // DEBUG + { + assert(endOffset <= info.compTotalHotCodeSize); + } + pColdCode = nullptr; } else @@ -890,43 +899,17 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); -#ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr)) + unwindEmitFuncHelper(func, pHotCode, pColdCode, true); + + if (pColdCode != nullptr) { - fakeUnwindEmitFuncHelper(func, pHotCode); - } - else +#ifdef DEBUG + if (!JitConfig.JitFakeProcedureSplitting()) #endif // DEBUG - { - unwindEmitFuncHelper(func, pHotCode, pColdCode, true); - - if (pColdCode != nullptr) { unwindEmitFuncHelper(func, pHotCode, pColdCode, false); } } } -#ifdef DEBUG -void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode) -{ - assert(fgFirstColdBlock != nullptr); - assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets. - - const UNATIVE_OFFSET startOffset = 0; - const UNATIVE_OFFSET endOffset = info.compNativeCodeSize; - const DWORD unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot; - BYTE* pUnwindBlock = &func->unwindCodes[func->unwindCodeSlot]; - - if (opts.dspUnwind) - { - DumpUnwindInfo(true, startOffset, endOffset, (const UNWIND_INFO* const)pUnwindBlock); - } - - // Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section - eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, unwindCodeBytes, pUnwindBlock, - (CorJitFuncKind)func->funKind); -} -#endif // DEBUG - #endif // TARGET_AMD64 diff --git a/src/coreclr/jit/unwindarm.cpp b/src/coreclr/jit/unwindarm.cpp index 1eb7456250cbb..8a14c6edbb832 100644 --- a/src/coreclr/jit/unwindarm.cpp +++ b/src/coreclr/jit/unwindarm.cpp @@ -563,13 +563,20 @@ void Compiler::unwindReserve() void Compiler::unwindReserveFunc(FuncInfoDsc* func) { BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE; - bool funcHasColdSection = false; + bool funcHasColdSection = (fgFirstColdBlock != nullptr); + +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting() && funcHasColdSection) + { + funcHasColdSection = false; // "Trick" the VM into thinking we don't have a cold section. + } +#endif // DEBUG #if defined(FEATURE_CFI_SUPPORT) if (generateCFIUnwindCodes()) { DWORD unwindCodeBytes = 0; - if (fgFirstColdBlock != nullptr) + if (funcHasColdSection) { eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes); } @@ -584,7 +591,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func) // cold section. This needs to be done before we split into fragments, as each // of the hot and cold sections can have multiple fragments. - if (fgFirstColdBlock != NULL) + if (funcHasColdSection) { assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH @@ -595,8 +602,6 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func) func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo(); func->uwiCold->InitUnwindInfo(this, startLoc, endLoc); func->uwiCold->HotColdSplitCodes(&func->uwi); - - funcHasColdSection = true; } // First we need to split the function or funclet into fragments that are no larger @@ -1604,11 +1609,19 @@ void UnwindFragmentInfo::Allocate( UNATIVE_OFFSET endOffset; UNATIVE_OFFSET codeSize; - // We don't support hot/cold splitting with EH, so if there is cold code, this - // better not be a funclet! - // TODO-CQ: support funclets in cold code - - noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT); +// We don't support hot/cold splitting with EH, so if there is cold code, this +// better not be a funclet! +// TODO-CQ: support funclets in cold code +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL)) + { + noway_assert(isHotCode && (funKind == CORJIT_FUNC_ROOT)); + } + else +#endif // DEBUG + { + noway_assert(isHotCode || (funKind == CORJIT_FUNC_ROOT)); + } // Compute the final size, and start and end offsets of the fragment @@ -1656,7 +1669,17 @@ void UnwindFragmentInfo::Allocate( if (isHotCode) { - assert(endOffset <= uwiComp->info.compTotalHotCodeSize); +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL)) + { + assert(endOffset <= uwiComp->info.compNativeCodeSize); + } + else +#endif // DEBUG + { + assert(endOffset <= uwiComp->info.compTotalHotCodeSize); + } + pColdCode = NULL; } else diff --git a/src/coreclr/jit/unwindx86.cpp b/src/coreclr/jit/unwindx86.cpp index bd27e46cbef49..32d077429af6a 100644 --- a/src/coreclr/jit/unwindx86.cpp +++ b/src/coreclr/jit/unwindx86.cpp @@ -113,18 +113,17 @@ void Compiler::unwindEmit(void* pHotCode, void* pColdCode) // void Compiler::unwindReserveFunc(FuncInfoDsc* func) { -#ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr)) + unwindReserveFuncHelper(func, true); + + if (fgFirstColdBlock != nullptr) { - assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets. - unwindReserveFuncHelper(func, true); - } - else +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting()) + { + assert(func->funKind == FUNC_ROOT); // No splitting of funclets. + } + else #endif // DEBUG - { - unwindReserveFuncHelper(func, true); - - if (fgFirstColdBlock != nullptr) { unwindReserveFuncHelper(func, false); } @@ -164,17 +163,13 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER); static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER); -#ifdef DEBUG - if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr)) + unwindEmitFuncHelper(func, pHotCode, pColdCode, true); + + if (pColdCode != nullptr) { - fakeUnwindEmitFuncHelper(func, pHotCode); - } - else +#ifdef DEBUG + if (!JitConfig.JitFakeProcedureSplitting()) #endif // DEBUG - { - unwindEmitFuncHelper(func, pHotCode, pColdCode, true); - - if (pColdCode != nullptr) { unwindEmitFuncHelper(func, pHotCode, pColdCode, false); } @@ -258,7 +253,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo if (isHotCode) { - assert(endOffset <= info.compTotalHotCodeSize); +#ifdef DEBUG + if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr)) + { + assert(endOffset <= info.compNativeCodeSize); + } + else +#endif // DEBUG + { + assert(endOffset <= info.compTotalHotCodeSize); + } + pColdCode = nullptr; } else @@ -276,22 +281,4 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo (BYTE*)&unwindInfo, (CorJitFuncKind)func->funKind); } -#ifdef DEBUG -void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode) -{ - assert(fgFirstColdBlock != nullptr); - assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets. - - const UNATIVE_OFFSET startOffset = 0; - const UNATIVE_OFFSET endOffset = info.compNativeCodeSize; - - UNWIND_INFO unwindInfo; - unwindInfo.FunctionLength = (ULONG)(endOffset); - - // Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section - eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, sizeof(UNWIND_INFO), (BYTE*)&unwindInfo, - (CorJitFuncKind)func->funKind); -} -#endif // DEBUG - #endif // FEATURE_EH_FUNCLETS