Skip to content

Commit

Permalink
Implement stack probing using helpers on win-arm and linux-arm (dotne…
Browse files Browse the repository at this point in the history
  • Loading branch information
echesakov committed Oct 29, 2019
1 parent 857797d commit eae780c
Show file tree
Hide file tree
Showing 11 changed files with 321 additions and 226 deletions.
6 changes: 3 additions & 3 deletions src/inc/jithelpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -368,11 +368,11 @@

JITHELPER(CORINFO_HELP_GVMLOOKUP_FOR_SLOT, NULL, CORINFO_HELP_SIG_NO_ALIGN_STUB)

#if defined(_TARGET_X86_) || defined(_TARGET_AMD64_)
#ifndef _TARGET_ARM64_
JITHELPER(CORINFO_HELP_STACK_PROBE, JIT_StackProbe, CORINFO_HELP_SIG_REG_ONLY)
#else // !_TARGET_X86_ && !_TARGET_AMD64_
#else
JITHELPER(CORINFO_HELP_STACK_PROBE, NULL, CORINFO_HELP_SIG_UNDEF)
#endif // !_TARGET_X86_ && !_TARGET_AMD64_
#endif

#undef JITHELPER
#undef DYNAMICJITHELPER
Expand Down
84 changes: 84 additions & 0 deletions src/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1764,4 +1764,88 @@ void CodeGen::genProfilingLeaveCallback(unsigned helper)

#endif // PROFILING_SUPPORTED

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack and allocate the local stack frame - subtract from SP.
//
// Notes:
// The first instruction of the prolog is always a push (which touches the lowest address
// of the stack), either of the LR register or of some argument registers, e.g., in the case of
// pre-spilling. The LR register is always pushed because we require it to allow for GC return
// address hijacking (see the comment in CodeGen::genPushCalleeSavedRegisters()). These pushes
// happen immediately before calling this function, so the SP at the current location has already
// been touched.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

const target_size_t pageSize = compiler->eeGetPageSize();

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

if (frameSize < pageSize)
{
GetEmitter()->emitIns_R_I(INS_sub, EA_PTRSIZE, REG_SPBASE, frameSize);
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr initReg, [SP + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
}

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, frameSize);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, initReg);
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

genInstrWithConstant(INS_sub, EA_PTRSIZE, REG_STACK_PROBE_HELPER_ARG, REG_SPBASE, frameSize,
INS_FLAGS_DONT_CARE, REG_STACK_PROBE_HELPER_ARG);
regSet.verifyRegUsed(REG_STACK_PROBE_HELPER_ARG);
genEmitHelperCall(CORINFO_HELP_STACK_PROBE, 0, EA_UNKNOWN, REG_STACK_PROBE_HELPER_CALL_TARGET);
compiler->unwindPadding();
GetEmitter()->emitIns_R_R(INS_mov, EA_PTRSIZE, REG_SPBASE, REG_STACK_PROBE_HELPER_ARG);

if ((genRegMask(initReg) & (RBM_STACK_PROBE_HELPER_ARG | RBM_STACK_PROBE_HELPER_CALL_TARGET |
RBM_STACK_PROBE_HELPER_TRASH)) != RBM_NONE)
{
*pInitRegZeroed = false;
}
}

compiler->unwindAllocStack(frameSize);
#ifdef USING_SCOPE_INFO
if (!doubleAlignOrFramePointerUsed())
{
psiAdjustStackLevel(frameSize);
}
#endif // USING_SCOPE_INFO
}

#endif // _TARGET_ARM_
139 changes: 139 additions & 0 deletions src/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7787,4 +7787,143 @@ void CodeGen::genArm64EmitterUnitTests()
}
#endif // defined(DEBUG)

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack.
//
// Notes:
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
// on Windows as well just to be consistent, even though it should not be necessary.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

const target_size_t pageSize = compiler->eeGetPageSize();

// What offset from the final SP was the last probe? If we haven't probed almost a complete page, and
// if the next action on the stack might subtract from SP first, before touching the current SP, then
// we do one more probe at the very bottom. This can happen if we call a function on arm64 that does
// a "STP fp, lr, [sp-504]!", that is, pre-decrement SP then store. Note that we probe here for arm64,
// but we don't alter SP.
target_size_t lastTouchDelta = 0;

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

if (frameSize < pageSize)
{
lastTouchDelta = frameSize;
}
else if (frameSize < compiler->getVeryLargeFrameSize())
{
lastTouchDelta = frameSize;

for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
// Generate:
// movw initReg, -probeOffset
// ldr wzr, [sp + initReg]

instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)probeOffset);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero

lastTouchDelta -= pageSize;
}

assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
}
else
{
assert(frameSize >= compiler->getVeryLargeFrameSize());

// Emit the following sequence to 'tickle' the pages. Note it is important that stack pointer not change
// until this is complete since the tickles could cause a stack overflow, and we need to be able to crawl
// the stack afterward (which means the stack pointer needs to be known).

regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg

regNumber rOffset = initReg;
regNumber rLimit;
regMaskTP tempMask;

// We pick the next lowest register number for rLimit
noway_assert(availMask != RBM_NONE);
tempMask = genFindLowestBit(availMask);
rLimit = genRegNumFromMask(tempMask);

// Generate:
//
// mov rOffset, -pageSize // On arm, this turns out to be "movw r1, 0xf000; sxth r1, r1".
// // We could save 4 bytes in the prolog by using "movs r1, 0" at the
// // runtime expense of running a useless first loop iteration.
// mov rLimit, -frameSize
// loop:
// ldr wzr, [sp + rOffset]
// sub rOffset, pageSize
// cmp rLimit, rOffset
// b.ls loop // If rLimit is lower or same, we need to probe this rOffset. Note
// // especially that if it is the same, we haven't probed this page.

noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); // make sure framesize safely fits within an int

instGen_Set_Reg_To_Imm(EA_PTRSIZE, rOffset, -(ssize_t)pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, rLimit, -(ssize_t)frameSize);

//
// Can't have a label inside the ReJIT padding area
//
genPrologPadForReJit();

// There's a "virtual" label here. But we can't create a label in the prolog, so we use the magic
// `emitIns_J` with a negative `instrCount` to branch back a specific number of instructions.

GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, rOffset);
GetEmitter()->emitIns_R_R_I(INS_sub, EA_PTRSIZE, rOffset, rOffset, pageSize);
GetEmitter()->emitIns_R_R(INS_cmp, EA_PTRSIZE, rLimit, rOffset); // If equal, we need to probe again
GetEmitter()->emitIns_J(INS_bls, NULL, -4);

*pInitRegZeroed = false; // The initReg does not contain zero

compiler->unwindPadding();

lastTouchDelta = frameSize % pageSize;
}

if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
{
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < 2 * pageSize);
instGen_Set_Reg_To_Imm(EA_PTRSIZE, initReg, -(ssize_t)frameSize);
GetEmitter()->emitIns_R_R_R(INS_ldr, EA_4BYTE, REG_ZR, REG_SPBASE, initReg);
compiler->unwindPadding();

regSet.verifyRegUsed(initReg);
*pInitRegZeroed = false; // The initReg does not contain zero
}
}

#endif // _TARGET_ARM64_
Loading

0 comments on commit eae780c

Please sign in to comment.