Skip to content

Commit

Permalink
cranelift: Round inline stack probes down, not up
Browse files Browse the repository at this point in the history
When we have `enable_probestack` turned on and set `probestack_strategy`
to "inline", we have to compute how many pages of the stack we'll probe.

The current implementation rounds our stack frame size up to the nearest
multiple of the page size, then probes each page once.

However, if our stack frame is not a multiple of the page size, that
means there's a partial page at the end. It's not necessary to probe
that partial page, just like it's unnecessary to probe at all if the
frame is smaller than one page. Either way, any signal handler needs to
be prepared for stack accesses on that last page to fault at any time
during the function's execution.
  • Loading branch information
jameysharp authored and sunfishcode committed Dec 12, 2024
1 parent 9aa048b commit 17b73b7
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 20 deletions.
6 changes: 4 additions & 2 deletions cranelift/codegen/src/isa/aarch64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -705,8 +705,10 @@ impl ABIMachineSpec for AArch64MachineDeps {
// Set this to 3 to keep the max size of the probe to 6 instructions.
const PROBE_MAX_UNROLL: u32 = 3;

let probe_count = align_to(frame_size, guard_size) / guard_size;
if probe_count <= PROBE_MAX_UNROLL {
let probe_count = frame_size / guard_size;
if probe_count == 0 {
// No probe necessary
} else if probe_count <= PROBE_MAX_UNROLL {
Self::gen_probestack_unroll(insts, guard_size, probe_count)
} else {
Self::gen_probestack_loop(insts, frame_size, guard_size)
Expand Down
6 changes: 5 additions & 1 deletion cranelift/codegen/src/isa/riscv64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -698,7 +698,11 @@ impl ABIMachineSpec for Riscv64MachineDeps {
// Unroll at most n consecutive probes, before falling back to using a loop
const PROBE_MAX_UNROLL: u32 = 3;
// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;
let probe_count = frame_size / guard_size;
if probe_count == 0 {
// No probe necessary
return;
}

// Must be a caller-saved register that is not an argument.
let tmp = Writable::from_reg(x_reg(28)); // t3
Expand Down
7 changes: 4 additions & 3 deletions cranelift/codegen/src/isa/x64/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -628,9 +628,10 @@ impl ABIMachineSpec for X64ABIMachineSpec {
const PROBE_MAX_UNROLL: u32 = 4;

// Number of probes that we need to perform
let probe_count = align_to(frame_size, guard_size) / guard_size;

if probe_count <= PROBE_MAX_UNROLL {
let probe_count = frame_size / guard_size;
if probe_count == 0 {
// No probe necessary
} else if probe_count <= PROBE_MAX_UNROLL {
Self::gen_probestack_unroll(insts, guard_size, probe_count)
} else {
Self::gen_probestack_loop(insts, call_conv, frame_size, guard_size)
Expand Down
20 changes: 10 additions & 10 deletions cranelift/codegen/src/machinst/abi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1809,16 +1809,16 @@ impl<M: ABIMachineSpec> Callee<M> {

if self.flags.enable_probestack() {
let guard_size = 1 << self.flags.probestack_size_log2();
if total_stacksize >= guard_size {
match self.flags.probestack_strategy() {
ProbestackStrategy::Inline => M::gen_inline_probestack(
&mut insts,
self.call_conv,
total_stacksize,
guard_size,
),
ProbestackStrategy::Outline => {
M::gen_probestack(&mut insts, total_stacksize)
match self.flags.probestack_strategy() {
ProbestackStrategy::Inline => M::gen_inline_probestack(
&mut insts,
self.call_conv,
total_stacksize,
guard_size,
),
ProbestackStrategy::Outline => {
if total_stacksize >= guard_size {
M::gen_probestack(&mut insts, total_stacksize);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ block0:
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; inline_stack_probe##guard_size=4096 probe_count=25 tmp=t3
; inline_stack_probe##guard_size=4096 probe_count=24 tmp=t3
; lui t6,-24
; addi t6,t6,-1696
; add sp,sp,t6
Expand All @@ -140,7 +140,7 @@ block0:
; c.sdsp ra, 8(sp)
; c.sdsp s0, 0(sp)
; c.mv s0, sp
; c.lui t6, 0x19
; c.lui t6, 0x18
; c.lui t3, 1
; bgeu t3, t6, 0x12
; sub t5, sp, t6
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ block0:
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; inline_stack_probe##guard_size=4096 probe_count=25 tmp=t3
; inline_stack_probe##guard_size=4096 probe_count=24 tmp=t3
; lui t6,-24
; addi t6,t6,-1696
; add sp,sp,t6
Expand All @@ -140,7 +140,7 @@ block0:
; sd ra, 8(sp)
; sd s0, 0(sp)
; mv s0, sp
; lui t6, 0x19
; lui t6, 0x18
; lui t3, 1
; bgeu t3, t6, 0x14
; sub t5, sp, t6
Expand Down

0 comments on commit 17b73b7

Please sign in to comment.