Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable fake hot/cold splitting on ARM64 #70708

Merged
merged 13 commits into from
Jun 22, 2022
6 changes: 3 additions & 3 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3199,10 +3199,10 @@ void Compiler::compInitOptions(JitFlags* jitFlags)

opts.compProcedureSplitting = jitFlags->IsSet(JitFlags::JIT_FLAG_PROCSPLIT) || enableFakeSplitting;

#ifdef TARGET_ARM64
// TODO-ARM64-NYI: enable hot/cold splitting
#ifdef TARGET_LOONGARCH64
// Hot/cold splitting is not being tested on LoongArch64.
opts.compProcedureSplitting = false;
#endif // TARGET_ARM64
#endif // TARGET_LOONGARCH64

#ifdef DEBUG
opts.compProcedureSplittingEH = opts.compProcedureSplitting;
Expand Down
6 changes: 1 addition & 5 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -7665,7 +7665,7 @@ class Compiler

// ICorJitInfo wrappers

void eeAllocMem(AllocMemArgs* args);
void eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment);

void eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize);

Expand Down Expand Up @@ -8022,10 +8022,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);

#ifdef DEBUG
void fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode);
#endif // DEBUG

#endif // TARGET_AMD64 || (TARGET_X86 && FEATURE_EH_FUNCLETS)

UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
Expand Down
62 changes: 46 additions & 16 deletions src/coreclr/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1122,34 +1122,64 @@ void Compiler::eeDispLineInfos()
* (e.g., host AMD64, target ARM64), then VM will get confused anyway.
*/

void Compiler::eeAllocMem(AllocMemArgs* args)
void Compiler::eeAllocMem(AllocMemArgs* args, const UNATIVE_OFFSET roDataSectionAlignment)
{
#ifdef DEBUG
const UNATIVE_OFFSET hotSizeRequest = args->hotCodeSize;
const UNATIVE_OFFSET coldSizeRequest = args->coldCodeSize;

// Fake splitting implementation: place hot/cold code in contiguous section
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
// Fake splitting implementation: place hot/cold code in contiguous section.
UNATIVE_OFFSET coldCodeOffset = 0;
if (JitConfig.JitFakeProcedureSplitting() && (args->coldCodeSize > 0))
{
args->hotCodeSize = hotSizeRequest + coldSizeRequest;
coldCodeOffset = args->hotCodeSize;
assert(coldCodeOffset > 0);
args->hotCodeSize += args->coldCodeSize;
args->coldCodeSize = 0;
}
#endif

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.
// For LoongArch64 using `pcaddi + ld` to access such data.

UNATIVE_OFFSET roDataAlignmentDelta = 0;
if (args->roDataSize > 0)
{
roDataAlignmentDelta = AlignmentPad(args->hotCodeSize, roDataSectionAlignment);
}

const UNATIVE_OFFSET roDataOffset = args->hotCodeSize + roDataAlignmentDelta;
args->hotCodeSize = roDataOffset + args->roDataSize;
args->roDataSize = 0;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

info.compCompHnd->allocMem(args);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (coldSizeRequest > 0))
{
// Fix up hot/cold code pointers
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + hotSizeRequest;
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + hotSizeRequest;

// Reset args' hot/cold code sizes in case caller reads them later
args->hotCodeSize = hotSizeRequest;
args->coldCodeSize = coldSizeRequest;
if (JitConfig.JitFakeProcedureSplitting() && (coldCodeOffset > 0))
{
// Fix up cold code pointers. Cold section is adjacent to hot section.
assert(args->coldCodeBlock == nullptr);
assert(args->coldCodeBlockRW == nullptr);
args->coldCodeBlock = ((BYTE*)args->hotCodeBlock) + coldCodeOffset;
args->coldCodeBlockRW = ((BYTE*)args->hotCodeBlockRW) + coldCodeOffset;
}
#endif

#endif // DEBUG

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)

// Fix up data section pointers.
assert(args->roDataBlock == nullptr);
assert(args->roDataBlockRW == nullptr);
args->roDataBlock = ((BYTE*)args->hotCodeBlock) + roDataOffset;
args->roDataBlockRW = ((BYTE*)args->hotCodeBlockRW) + roDataOffset;

#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
}

void Compiler::eeReserveUnwindInfo(bool isFunclet, bool isColdCode, ULONG unwindSize)
Expand Down
39 changes: 1 addition & 38 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4557,7 +4557,6 @@ void emitter::emitJumpDistBind()
else if (emitIsUncondJump(jmp))
{
// Nothing to do; we don't shrink these.
assert(jmp->idjShort);
ssz = JMP_SIZE_SMALL;
}
else if (emitIsLoadLabel(jmp))
Expand Down Expand Up @@ -6346,47 +6345,13 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
AllocMemArgs args;
memset(&args, 0, sizeof(args));

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.
// For LoongArch64 using `pcaddi + ld` to access such data.
if (emitTotalColdCodeSize > 0)
{
// JIT data might be far away from the cold code.
NYI("Need to handle fix-up to data from cold code.");
}

UNATIVE_OFFSET roDataAlignmentDelta = 0;
if (emitConsDsc.dsdOffs > 0)
{
roDataAlignmentDelta = AlignmentPad(emitTotalHotCodeSize, dataAlignment);
}

args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs;
args.coldCodeSize = emitTotalColdCodeSize;
args.roDataSize = 0;
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitComp->eeAllocMem(&args);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
coldCodeBlock = (BYTE*)args.coldCodeBlock;
coldCodeBlockRW = (BYTE*)args.coldCodeBlockRW;

consBlock = codeBlock + emitTotalHotCodeSize + roDataAlignmentDelta;
consBlockRW = codeBlockRW + emitTotalHotCodeSize + roDataAlignmentDelta;

#else

args.hotCodeSize = emitTotalHotCodeSize;
args.coldCodeSize = emitTotalColdCodeSize;
args.roDataSize = emitConsDsc.dsdOffs;
args.xcptnsCount = xcptnsCount;
args.flag = allocMemFlag;

emitComp->eeAllocMem(&args);
emitComp->eeAllocMem(&args, emitConsDsc.alignment);

codeBlock = (BYTE*)args.hotCodeBlock;
codeBlockRW = (BYTE*)args.hotCodeBlockRW;
Expand All @@ -6395,8 +6360,6 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
consBlock = (BYTE*)args.roDataBlock;
consBlockRW = (BYTE*)args.roDataBlockRW;

#endif

#ifdef DEBUG
if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN) != 0)
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emit.h
Original file line number Diff line number Diff line change
Expand Up @@ -997,7 +997,7 @@ class emitter
case IF_LARGELDC:
if (isVectorRegister(idReg1()))
{
// adrp + ldr + fmov
// (adrp + ldr + fmov) or (adrp + add + ld1)
size = 12;
}
else
Expand Down
Loading