Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Implement stack probing using helpers #27184

Merged
merged 15 commits into from
Oct 29, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/inc/jithelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,11 @@

JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB)

#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
#ifndef _TARGET_ARM64_
JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY)
#else // !_TARGET_X86_ && !_TARGET_AMD64_
#else
JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
#endif // !_TARGET_X86_ && !_TARGET_AMD64_
#endif

#undef JITHELPER
#undef DYNAMICJITHELPER
Expand Down
84 changes: 84 additions & 0 deletions src/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1764,4 +1764,88 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)

#endif // PROFILING_SUPPORTED

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP.
//
// Notes:
// The first instruction of the prolog is always a push (which touches the lowest address
// of the stack), either of the LR register or of some argument registers, e.g., in the case of
// pre-spilling. The LR register is always pushed because we require it to allow for GC return
// address hijacking (see the comment in CodeGen::genPushCalleeSavedRegisters()). These pushes
// happen immediately before calling this function, so the SP at the current location has already
// been touched.
//
echesakov marked this conversation as resolved.
Show resolved Hide resolved
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

const target_size_t pageSize = compiler->eeGetPageSize();

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

if (frameSize < pageSize)
{
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize);
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr initReg, [SP + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
}

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, initReg);
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize,
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);

if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
{
*pInitRegZeroed = false;
}
}

compiler->unwindAllocStack(frameSize);
#ifdef USING_SCOPE_INFO
if (!doubleAlignOrFramePointerUsed())
{
psiAdjustStackLevel(frameSize);
}
#endif // USING_SCOPE_INFO
}

#endif // _TARGET_ARM_
139 changes: 139 additions & 0 deletions src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7787,4 +7787,143 @@ void CodeGen::genArm64EmitterUnitTests()
}
#endif // defined(DEBUG)

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack.
//
// Notes:
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
// on Windows as well just to be consistent, even though it should not be necessary.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

const target_size_t pageSize = compiler->eeGetPageSize();

// What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
// if the next action on the stack might subtract from SP first, before touching the current SP, then
// we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
// a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
// but we don't alter SP.
target_size_t lastTouchDelta = 0;

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

if (frameSize < pageSize)
{
lastTouchDelta = frameSize;
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
lastTouchDelta = frameSize;

for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr wzr, [sp + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

lastTouchDelta -= pageSize;
}

assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

// Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
// until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
// the stack afterward (which means the stack pointer needs to be known).

regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg

regNumber rOffset = initReg;
regNumber rLimit;
regMaskTP tempMask;

// We pick the next lowest register number for rLimit
noway_assert(availMask != RBM_NONE);
tempMask = genFindLowestBit(availMask);
rLimit = genRegNumFromMask(tempMask);

// Generate:
//
// mov rOffset, -pageSize // On arm, this turns out to be "movw r1, 0xf000; sxth r1, r1".
// // We could save 4 bytes in the prolog by using "movs r1, 0" at the
// // runtime expense of running a useless first loop iteration.
// mov rLimit, -frameSize
// loop:
// ldr wzr, [sp + rOffset]
// sub rOffset, pageSize
// cmp rLimit, rOffset
// b.ls loop // If rLimit is lower or same, we need to probe this rOffset. Note
// // especially that if it is the same, we haven't probed this page.
echesakov marked this conversation as resolved.
Show resolved Hide resolved

noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int

instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);

//
// Can't have a label inside the ReJIT padding area
//
genPrologPadForReJit();

// There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
// `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.

GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
GetEmitter()->emitIns_J(INS_bls, NULL, -4);

*pInitRegZeroed = false; // The initReg does not contain zero

compiler->unwindPadding();

lastTouchDelta = frameSize % pageSize;
}

if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
{
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
compiler->unwindPadding();

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero
}
}

#endif // _TARGET_ARM64_
Loading