From 63c77456181c2ecf6faa6bfe500f2564cc748d3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Sat, 28 Oct 2017 16:19:07 +0200 Subject: [PATCH 01/17] Use fibers on Windows. Use CFI instructions on other platforms to enable unwinding, catching panics and backtraces. --- Cargo.toml | 4 + README.md | 2 +- build.rs | 21 ++-- src/arch/asm.h | 2 +- src/arch/i686.S | 19 ++-- src/arch/i686.asm | 33 ------ src/arch/windows.c | 27 +++++ src/arch/x86_64.S | 30 ++---- src/arch/x86_64.asm | 23 ---- src/lib.rs | 248 +++++++++++++++++++++++++++++--------------- tests/smoke.rs | 38 ++++++- 11 files changed, 262 insertions(+), 185 deletions(-) delete mode 100644 src/arch/i686.asm create mode 100644 src/arch/windows.c delete mode 100644 src/arch/x86_64.asm diff --git a/Cargo.toml b/Cargo.toml index ec19b84..921e3f4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,5 +22,9 @@ test = false cfg-if = "0.1" libc = "0.2" +[target.'cfg(windows)'.dependencies] +kernel32-sys = "0.2.2" +winapi = "0.2.8" + [build-dependencies] cc = "1.0" diff --git a/README.md b/README.md index c5ac5e8..ad568e7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A stack-growth library for Rust. Enables annotating fixed points in programs where the stack may want to grow larger. Spills over to the heap if the stack -has it its limit. +has hit its limit. This library is intended on helping implement recursive algorithms. diff --git a/build.rs b/build.rs index 89ca9f7..3f23d29 100644 --- a/build.rs +++ b/build.rs @@ -4,7 +4,6 @@ use std::env; fn main() { let target = env::var("TARGET").unwrap(); - let msvc = target.contains("msvc"); let mut cfg = cc::Build::new(); @@ -19,15 +18,19 @@ fn main() { stacker: {}\n\n", target); } - if target.starts_with("x86_64") { - cfg.file(if msvc {"src/arch/x86_64.asm"} else {"src/arch/x86_64.S"}); - cfg.define("X86_64", None); - } else if target.contains("i686") { - cfg.file(if msvc {"src/arch/i686.asm"} else {"src/arch/i686.S"}); - cfg.define("X86", None); + if target.contains("windows") { + cfg.file("src/arch/windows.c"); } else { - panic!("\n\nusing currently unsupported target triple with \ - stacker: {}\n\n", target); + if target.starts_with("x86_64") { + cfg.file("src/arch/x86_64.S"); + cfg.define("X86_64", None); + } else if target.contains("i686") { + cfg.file("src/arch/i686.S"); + cfg.define("X86", None); + } else { + panic!("\n\nusing currently unsupported target triple with \ + stacker: {}\n\n", target); + } } cfg.include("src/arch").compile("libstacker.a"); diff --git a/src/arch/asm.h b/src/arch/asm.h index 56c9d28..e53773e 100644 --- a/src/arch/asm.h +++ b/src/arch/asm.h @@ -1,4 +1,4 @@ -#if defined(APPLE) || (defined(WINDOWS) && defined(X86)) +#if defined(APPLE) #define GLOBAL(name) .globl _ ## name; _ ## name #else #define GLOBAL(name) .globl name; name diff --git a/src/arch/i686.S b/src/arch/i686.S index dd143c5..da1ff88 100644 --- a/src/arch/i686.S +++ b/src/arch/i686.S @@ -5,23 +5,18 @@ GLOBAL(__stacker_black_box): ret -GLOBAL(__stacker_stack_pointer): - mov %esp, %eax - ret - -#if defined(WINDOWS) -GLOBAL(__stacker_get_tib_32): - mov %fs:0x18, %eax - ret -#endif - GLOBAL(__stacker_switch_stacks): + .cfi_startproc push %ebp + .cfi_def_cfa_offset 8 + .cfi_offset ebp, -8 mov %esp, %ebp - mov 8(%ebp), %esp // switch to our new stack + .cfi_def_cfa_register ebp + mov 16(%ebp), %esp // switch to our new stack mov 12(%ebp), %eax // load function we're going to call - push 16(%ebp) // push argument to first function + push 8(%ebp) // push argument to first function call *%eax // call our function pointer mov %ebp, %esp // restore the old stack pointer pop %ebp ret + .cfi_endproc diff --git a/src/arch/i686.asm b/src/arch/i686.asm deleted file mode 100644 index 0bb1333..0000000 --- a/src/arch/i686.asm +++ /dev/null @@ -1,33 +0,0 @@ -.586 -.MODEL FLAT, C -.CODE - -__stacker_black_box PROC - RET -__stacker_black_box ENDP - -__stacker_stack_pointer PROC - MOV EAX, ESP - RET -__stacker_stack_pointer ENDP - -__stacker_get_tib_32 PROC - ASSUME FS:NOTHING - MOV EAX, FS:[24] - ASSUME FS:ERROR - RET -__stacker_get_tib_32 ENDP - -__stacker_switch_stacks PROC - PUSH EBP - MOV EBP, ESP - MOV ESP, [EBP + 8] ; switch stacks - MOV EAX, [EBP + 12] ; load the function we're going to call - PUSH [EBP + 16] ; push the argument to this function - CALL EAX ; call the next function - MOV ESP, EBP ; restore the old stack pointer - POP EBP - RET -__stacker_switch_stacks ENDP - -END diff --git a/src/arch/windows.c b/src/arch/windows.c new file mode 100644 index 0000000..da1b14a --- /dev/null +++ b/src/arch/windows.c @@ -0,0 +1,27 @@ +#include + +void __stacker_black_box() {} + +PVOID __stacker_get_current_fiber() { + return GetCurrentFiber(); +} + +static size_t calc_stack_limit(size_t stack_low, size_t stack_guarantee) { + return stack_low + + max(stack_guarantee, sizeof(void *) == 4 ? 0x1000 : 0x2000) + // The guaranteed pages on a stack overflow + 0x1000; // The guard page +} + +#if defined(_M_X64) +size_t __stacker_get_stack_limit() { + return calc_stack_limit(__readgsqword(0x1478), // The base address of the stack. Referenced in GetCurrentThreadStackLimits + __readgsqword(0x1748)); // The guaranteed pages on a stack overflow. Referenced in SetThreadStackGuarantee +} +#endif + +#ifdef _M_IX86 +size_t __stacker_get_stack_limit() { + return calc_stack_limit(__readfsdword(0xE0C), // The base address of the stack. Referenced in GetCurrentThreadStackLimits + __readfsdword(0xF78)); // The guaranteed pages on a stack overflow. Referenced in SetThreadStackGuarantee +} +#endif \ No newline at end of file diff --git a/src/arch/x86_64.S b/src/arch/x86_64.S index cbdf016..a82d73e 100644 --- a/src/arch/x86_64.S +++ b/src/arch/x86_64.S @@ -5,32 +5,16 @@ GLOBAL(__stacker_black_box): ret -GLOBAL(__stacker_stack_pointer): - movq %rsp, %rax - ret - -#if defined(WINDOWS) -#define ARG1 %rcx -#define ARG2 %rdx -#define ARG3 %r8 -#else -#define ARG1 %rdi -#define ARG2 %rsi -#define ARG3 %rdx -#endif - -#if defined(WINDOWS) -GLOBAL(__stacker_get_tib_64): - mov %gs:0x30, %rax - ret -#endif - GLOBAL(__stacker_switch_stacks): + .cfi_startproc push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset rbp, -16 mov %rsp, %rbp - mov ARG1, %rsp // switch to our new stack - mov ARG3, ARG1 // move the data pointer to the first argument - call *ARG2 // call our function pointer + .cfi_def_cfa_register rbp + mov %rdx, %rsp // switch to our new stack + call *%rsi // call our function pointer, data argument in %rdi mov %rbp, %rsp // restore the old stack pointer pop %rbp ret + .cfi_endproc diff --git a/src/arch/x86_64.asm b/src/arch/x86_64.asm deleted file mode 100644 index c14696c..0000000 --- a/src/arch/x86_64.asm +++ /dev/null @@ -1,23 +0,0 @@ -_text SEGMENT - -__stacker_black_box PROC - RET -__stacker_black_box ENDP - -__stacker_stack_pointer PROC - MOV RAX, RSP - RET -__stacker_stack_pointer ENDP - -__stacker_switch_stacks PROC - PUSH RBP - MOV RBP, RSP - MOV RSP, RCX ; switch to our new stack - MOV RCX, R8 ; move the data pointer to the first argument - CALL RDX ; call our function pointer - MOV RSP, RBP ; restore the old stack pointer - POP RBP - RET -__stacker_switch_stacks ENDP - -END diff --git a/src/lib.rs b/src/lib.rs index 9420627..d242a0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -27,29 +27,10 @@ #[macro_use] extern crate cfg_if; extern crate libc; - -use std::cell::Cell; - -extern { - fn __stacker_stack_pointer() -> usize; - fn __stacker_switch_stacks(new_stack: usize, - fnptr: *const u8, - dataptr: *mut u8); -} - -thread_local! { - static STACK_LIMIT: Cell = Cell::new(unsafe { - guess_os_stack_limit() - }) -} - -fn get_stack_limit() -> usize { - STACK_LIMIT.with(|s| s.get()) -} - -fn set_stack_limit(l: usize) { - STACK_LIMIT.with(|s| s.set(l)) -} +#[cfg(windows)] +extern crate kernel32; +#[cfg(windows)] +extern crate winapi; /// Grows the call stack if necessary. /// @@ -60,13 +41,14 @@ fn set_stack_limit(l: usize) { /// /// The closure `f` is guaranteed to run on a stack with at least `red_zone` /// bytes, and it will be run on the current stack if there's space available. +#[inline(always)] pub fn maybe_grow R>(red_zone: usize, stack_size: usize, f: F) -> R { if remaining_stack() >= red_zone { f() } else { - grow_the_stack(stack_size, f) + grow(stack_size, f) } } @@ -74,84 +56,186 @@ pub fn maybe_grow R>(red_zone: usize, /// /// This function will return the amount of stack space left which will be used /// to determine whether a stack switch should be made or not. +#[inline(always)] pub fn remaining_stack() -> usize { - unsafe { - __stacker_stack_pointer() - get_stack_limit() - } + &mut () as *mut _ as usize - get_stack_limit() } +/// Always creates a new stack for the passed closure to run on. +/// The closure will still be on the same thread as the caller of `grow`. +/// This will allocate a new stack with at least `stack_size` bytes. #[inline(never)] -fn grow_the_stack R>(stack_size: usize, f: F) -> R { +pub fn grow R>(stack_size: usize, f: F) -> R { let mut f = Some(f); let mut ret = None; - unsafe { - _grow_the_stack(stack_size, &mut || { - ret = Some(f.take().unwrap()()); - }); - } + _grow(stack_size, &mut || { + ret = Some(f.take().unwrap()()); + }); ret.unwrap() } -unsafe fn _grow_the_stack(stack_size: usize, mut f: &mut FnMut()) { - // Align to 16-bytes (see below for why) - let stack_size = (stack_size + 15) / 16 * 16; +cfg_if! { + if #[cfg(not(windows))] { + use std::cell::Cell; - // Allocate some new stack for oureslves - let mut stack = Vec::::with_capacity(stack_size); - let new_limit = stack.as_ptr() as usize + 32 * 1024; + extern { + fn __stacker_switch_stacks(dataptr: *mut u8, + fnptr: *const u8, + new_stack: usize); + } - // Save off the old stack limits - let old_limit = get_stack_limit(); + thread_local! { + static STACK_LIMIT: Cell = Cell::new(unsafe { + guess_os_stack_limit() + }) + } - // Prepare stack limits for the stack switch - set_stack_limit(new_limit); + #[inline(always)] + fn get_stack_limit() -> usize { + STACK_LIMIT.with(|s| s.get()) + } - // Make sure the stack is 16-byte aligned which should be enough for all - // platforms right now. Allocations on 64-bit are already 16-byte aligned - // and our switching routine doesn't push any other data, but the routine on - // 32-bit pushes an argument so we need a bit of an offset to get it 16-byte - // aligned when the call is made. - let offset = if cfg!(target_pointer_width = "32") { - 12 - } else { - 0 - }; - __stacker_switch_stacks(stack.as_mut_ptr() as usize + stack_size - offset, - doit as usize as *const _, - &mut f as *mut &mut FnMut() as *mut u8); - - // Once we've returned reset bothe stack limits and then return value same - // value the closure returned. - set_stack_limit(old_limit); - - unsafe extern fn doit(f: &mut &mut FnMut()) { - f(); + fn set_stack_limit(l: usize) { + STACK_LIMIT.with(|s| s.set(l)) + } + + fn _grow(stack_size: usize, mut f: &mut FnMut()) { + // Align to 16-bytes (see below for why) + let stack_size = (stack_size + 15) / 16 * 16; + + // Allocate some new stack for oureslves + let mut stack = Vec::::with_capacity(stack_size); + let new_limit = stack.as_ptr() as usize + 32 * 1024; + + // Save off the old stack limits + let old_limit = get_stack_limit(); + + // Prepare stack limits for the stack switch + set_stack_limit(new_limit); + + // Make sure the stack is 16-byte aligned which should be enough for all + // platforms right now. Allocations on 64-bit are already 16-byte aligned + // and our switching routine doesn't push any other data, but the routine on + // 32-bit pushes an argument so we need a bit of an offset to get it 16-byte + // aligned when the call is made. + let offset = if cfg!(target_pointer_width = "32") { + 12 + } else { + 0 + }; + + unsafe { + __stacker_switch_stacks(&mut f as *mut &mut FnMut() as *mut u8, + doit as usize as *const _, + stack.as_mut_ptr() as usize + stack_size - offset); + } + + // Once we've returned reset bothe stack limits and then return value same + // value the closure returned. + set_stack_limit(old_limit); + } } } +extern fn doit(f: &mut &mut FnMut()) { + f(); +} + cfg_if! { if #[cfg(windows)] { - // See this for where all this logic is coming from. - // - // https://github.com/adobe/webkit/blob/0441266/Source/WTF/wtf - // /StackBounds.cpp - unsafe fn guess_os_stack_limit() -> usize { - #[cfg(target_pointer_width = "32")] - extern { - #[link_name = "__stacker_get_tib_32"] - fn get_tib_address() -> *const usize; + extern { + fn __stacker_get_current_fiber() -> winapi::PVOID; + } + + #[no_mangle] + pub unsafe extern fn __stacker_switch_stacks_callback(f: &mut &mut FnMut()) { + f(); + } + + struct FiberInfo<'a> { + callback: &'a mut FnMut(), + result: Option>, + parent_fiber: winapi::LPVOID, + } + + unsafe extern "system" fn fiber_proc(info: winapi::LPVOID) { + let info = &mut *(info as *mut FiberInfo); + info.result = Some(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + (info.callback)(); + }))); + kernel32::SwitchToFiber(info.parent_fiber); + return; + } + + fn _grow(stack_size: usize, callback: &mut FnMut()) { + unsafe { + let was_fiber = kernel32::IsThreadAFiber() == winapi::TRUE; + + let mut info = FiberInfo { + callback, + result: None, + parent_fiber: if was_fiber { + __stacker_get_current_fiber() + } else { + kernel32::ConvertThreadToFiber(0i32 as _) + }, + }; + if info.parent_fiber == 0i32 as _ { + panic!("Unable to convert thread to fiber"); + } + let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); + if fiber == 0i32 as _ { + panic!("Unable to allocate fiber"); + } + kernel32::SwitchToFiber(fiber); + kernel32::DeleteFiber(fiber); + + if !was_fiber { + kernel32::ConvertFiberToThread(); + } + + if let Err(payload) = info.result.unwrap() { + std::panic::resume_unwind(payload); + } } - #[cfg(target_pointer_width = "64")] - extern "system" { - #[cfg_attr(target_env = "msvc", link_name = "NtCurrentTeb")] - #[cfg_attr(target_env = "gnu", link_name = "__stacker_get_tib_64")] - fn get_tib_address() -> *const usize; + } + + cfg_if! { + if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { + extern { + fn __stacker_get_stack_limit() -> usize; + } + + #[inline(always)] + fn get_stack_limit() -> usize { + unsafe { + __stacker_get_stack_limit() + } + } + } else { + #[inline(always)] + fn get_thread_stack_guarantee() -> usize { + let min_guarantee = if cfg!(target_pointer_width = "32") { + 0x1000 + } else { + 0x2000 + }; + let mut stack_guarantee = 0; + unsafe { + kernel32::SetThreadStackGuarantee(&mut stack_guarantee) + }; + std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000 + } + + #[inline(always)] + fn get_stack_limit() -> usize { + let mut mi; + unsafe { + kernel32::VirtualQuery(&mut () as *mut (), &mut mi, std::mem::size_of_val(&mi)); + } + mi.AllocationBase + get_thread_stack_guarantee() + 0x1000 + } } - // https://en.wikipedia.org/wiki/Win32_Thread_Information_Block for - // the struct layout of the 32-bit TIB. It looks like the struct - // layout of the 64-bit TIB is also the same for getting the stack - // limit: http://doxygen.reactos.org/d3/db0/structNT__TIB64.html - *get_tib_address().offset(2) } } else if #[cfg(target_os = "linux")] { use std::mem; diff --git a/tests/smoke.rs b/tests/smoke.rs index ea42149..80d12cf 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -26,7 +26,6 @@ fn deep() { } #[test] -#[ignore] fn panic() { fn foo(n: usize, s: &mut [u8]) { unsafe { __stacker_black_box(s.as_ptr()); } @@ -49,3 +48,40 @@ fn panic() { assert!(rx.recv().is_err()); } + +fn recursive(n: usize, f: F) -> usize { + if n > 0 { + stacker::grow(64 * 1024, || { + recursive(n - 1, f) + 1 + }) + } else { + f(); + 0 + } +} + +#[test] +fn catch_panic() { + let panic_result = std::panic::catch_unwind(|| { + recursive(100, || panic!()); + }); + assert!(panic_result.is_err()); +} + +#[test] +fn catch_panic_inside() { + stacker::grow(64 * 1024, || { + let panic_result = std::panic::catch_unwind(|| { + recursive(100, || panic!()); + }); + assert!(panic_result.is_err()); + }); +} + +#[test] +fn catch_panic_leaf() { + stacker::grow(64 * 1024, || { + let panic_result = std::panic::catch_unwind(|| panic!()); + assert!(panic_result.is_err()); + }); +} \ No newline at end of file From 37f1f8aad63cb97a0a433f7fc382c17c7dbd106f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Tue, 31 Oct 2017 23:59:05 +0100 Subject: [PATCH 02/17] Use mmap and create a guard page --- src/lib.rs | 80 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 64 insertions(+), 16 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index d242a0c..dd2b5f8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,6 +82,7 @@ cfg_if! { fn __stacker_switch_stacks(dataptr: *mut u8, fnptr: *const u8, new_stack: usize); + fn getpagesize() -> libc::c_int; } thread_local! { @@ -99,19 +100,70 @@ cfg_if! { STACK_LIMIT.with(|s| s.set(l)) } + struct StackSwitch { + map: *mut libc::c_void, + stack_size: usize, + old_stack_limit: usize, + } + + impl Drop for StackSwitch { + fn drop(&mut self) { + unsafe { + libc::munmap(self.map, self.stack_size); + } + set_stack_limit(self.old_stack_limit); + } + } + fn _grow(stack_size: usize, mut f: &mut FnMut()) { - // Align to 16-bytes (see below for why) - let stack_size = (stack_size + 15) / 16 * 16; + let page_size = unsafe { getpagesize() } as usize; - // Allocate some new stack for oureslves - let mut stack = Vec::::with_capacity(stack_size); - let new_limit = stack.as_ptr() as usize + 32 * 1024; + // Round the stack size up to a multiple of page_size + let rem = stack_size % page_size; + let stack_size = if rem == 0 { + stack_size + } else { + stack_size.checked_add((page_size - rem)) + .expect("stack size calculation overflowed") + }; - // Save off the old stack limits - let old_limit = get_stack_limit(); + // We need at least 2 page + let stack_size = std::cmp::max(stack_size, page_size); + + // Add a guard page + let stack_size = stack_size.checked_add(page_size) + .expect("stack size calculation overflowed"); + + // Allocate some new stack for ourselves + let map = unsafe { + libc::mmap(std::ptr::null_mut(), + stack_size, + libc::PROT_NONE, + libc::MAP_PRIVATE | + libc::MAP_ANON, + 0, + 0) + }; + if map == -1isize as _ { + panic!("unable to allocate stack") + } + let _switch = StackSwitch { + map, + stack_size, + old_stack_limit: get_stack_limit(), + }; + let result = unsafe { + libc::mprotect((map as usize + page_size) as *mut libc::c_void, + stack_size - page_size, + libc::PROT_READ | libc::PROT_WRITE) + }; + if result == -1 { + panic!("unable to set stack permissions") + } + let stack_low = map as usize; // Prepare stack limits for the stack switch - set_stack_limit(new_limit); + set_stack_limit(stack_low); // Make sure the stack is 16-byte aligned which should be enough for all // platforms right now. Allocations on 64-bit are already 16-byte aligned @@ -127,12 +179,10 @@ cfg_if! { unsafe { __stacker_switch_stacks(&mut f as *mut &mut FnMut() as *mut u8, doit as usize as *const _, - stack.as_mut_ptr() as usize + stack_size - offset); + stack_low + stack_size - offset); } - // Once we've returned reset bothe stack limits and then return value same - // value the closure returned. - set_stack_limit(old_limit); + // Dropping `switch` frees the memory mapping and restores the old stack limit } } } @@ -181,11 +231,11 @@ cfg_if! { }, }; if info.parent_fiber == 0i32 as _ { - panic!("Unable to convert thread to fiber"); + panic!("unable to convert thread to fiber"); } let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); if fiber == 0i32 as _ { - panic!("Unable to allocate fiber"); + panic!("unable to allocate fiber"); } kernel32::SwitchToFiber(fiber); kernel32::DeleteFiber(fiber); @@ -253,8 +303,6 @@ cfg_if! { stackaddr as usize } } else if #[cfg(target_os = "macos")] { - use libc::{c_void, pthread_t, size_t}; - unsafe fn guess_os_stack_limit() -> usize { libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize - libc::pthread_get_stacksize_np(libc::pthread_self()) as usize From 1dd42811a99645ee5d8ca56a36684468ab972971 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Fri, 16 Nov 2018 10:24:44 +0100 Subject: [PATCH 03/17] Add some integration tests --- tests/simple.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 tests/simple.rs diff --git a/tests/simple.rs b/tests/simple.rs new file mode 100644 index 0000000..4e4c46f --- /dev/null +++ b/tests/simple.rs @@ -0,0 +1,24 @@ +extern crate stacker; + +const RED_ZONE: usize = 100*1024; // 100k +const STACK_PER_RECURSION: usize = 1 * 1024 * 1024; // 1MB + +pub fn ensure_sufficient_stack R + std::panic::UnwindSafe>( + f: F +) -> R { + stacker::maybe_grow(RED_ZONE, STACK_PER_RECURSION, f) +} + +#[inline(never)] +fn recurse(n: usize) { + let x = [42u8; 50000]; + if n != 0 { + ensure_sufficient_stack(|| recurse(n - 1)); + } + drop(x); +} + +#[test] +fn foo() { + recurse(10000); +} \ No newline at end of file From 687c5fc2176243805dc92b3e46d6316646e4bf6f Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 08:28:45 +0100 Subject: [PATCH 04/17] Reintroduce `__stacker_stack_pointer` --- src/arch/i686.S | 4 ++++ src/arch/x86_64.S | 3 +++ src/lib.rs | 12 ++++++++---- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/arch/i686.S b/src/arch/i686.S index da1ff88..687d0c4 100644 --- a/src/arch/i686.S +++ b/src/arch/i686.S @@ -2,6 +2,10 @@ .text +GLOBAL(__stacker_stack_pointer): + mov %esp, %eax + ret + GLOBAL(__stacker_black_box): ret diff --git a/src/arch/x86_64.S b/src/arch/x86_64.S index a82d73e..744609f 100644 --- a/src/arch/x86_64.S +++ b/src/arch/x86_64.S @@ -4,6 +4,9 @@ GLOBAL(__stacker_black_box): ret +GLOBAL(__stacker_stack_pointer): + movq %rsp, %rax + ret GLOBAL(__stacker_switch_stacks): .cfi_startproc diff --git a/src/lib.rs b/src/lib.rs index dd2b5f8..f03bb8c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -52,13 +52,17 @@ pub fn maybe_grow R>(red_zone: usize, } } +extern { + fn __stacker_stack_pointer() -> usize; +} + /// Queries the amount of remaining stack as interpreted by this library. /// /// This function will return the amount of stack space left which will be used /// to determine whether a stack switch should be made or not. #[inline(always)] pub fn remaining_stack() -> usize { - &mut () as *mut _ as usize - get_stack_limit() + unsafe { __stacker_stack_pointer() - get_stack_limit() } } /// Always creates a new stack for the passed closure to run on. @@ -146,7 +150,7 @@ cfg_if! { }; if map == -1isize as _ { panic!("unable to allocate stack") - } + } let _switch = StackSwitch { map, stack_size, @@ -220,7 +224,7 @@ cfg_if! { fn _grow(stack_size: usize, callback: &mut FnMut()) { unsafe { let was_fiber = kernel32::IsThreadAFiber() == winapi::TRUE; - + let mut info = FiberInfo { callback, result: None, @@ -281,7 +285,7 @@ cfg_if! { fn get_stack_limit() -> usize { let mut mi; unsafe { - kernel32::VirtualQuery(&mut () as *mut (), &mut mi, std::mem::size_of_val(&mi)); + kernel32::VirtualQuery(__stacker_stack_pointer(), &mut mi, std::mem::size_of_val(&mi)); } mi.AllocationBase + get_thread_stack_guarantee() + 0x1000 } From 141d742ab59185edbec5a4b21cfd143671998284 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 09:39:44 +0100 Subject: [PATCH 05/17] Explain magic numbers --- src/arch/windows.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/arch/windows.c b/src/arch/windows.c index da1b14a..1b894c1 100644 --- a/src/arch/windows.c +++ b/src/arch/windows.c @@ -8,10 +8,13 @@ PVOID __stacker_get_current_fiber() { static size_t calc_stack_limit(size_t stack_low, size_t stack_guarantee) { return stack_low + - max(stack_guarantee, sizeof(void *) == 4 ? 0x1000 : 0x2000) + // The guaranteed pages on a stack overflow + max(stack_guarantee, sizeof(void *) == 4 ? 0x1000 : 0x2000) + // The guaranteed pages on a stack overflow 0x1000; // The guard page } +// Fast paths for x86 +// magic numbers are from https://en.wikipedia.org/wiki/Win32_Thread_Information_Block + #if defined(_M_X64) size_t __stacker_get_stack_limit() { return calc_stack_limit(__readgsqword(0x1478), // The base address of the stack. Referenced in GetCurrentThreadStackLimits @@ -24,4 +27,4 @@ size_t __stacker_get_stack_limit() { return calc_stack_limit(__readfsdword(0xE0C), // The base address of the stack. Referenced in GetCurrentThreadStackLimits __readfsdword(0xF78)); // The guaranteed pages on a stack overflow. Referenced in SetThreadStackGuarantee } -#endif \ No newline at end of file +#endif From 7778a6c591cee8bf2c7f4b5d3ae0bc0499e8e4e6 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 10:18:04 +0100 Subject: [PATCH 06/17] Reintroduce windows asm for `__stacker_stack_pointer` --- build.rs | 21 +++++++++------------ src/arch/i686.asm | 8 ++++++++ src/arch/x86_64.asm | 6 ++++++ 3 files changed, 23 insertions(+), 12 deletions(-) create mode 100644 src/arch/i686.asm create mode 100644 src/arch/x86_64.asm diff --git a/build.rs b/build.rs index 3f23d29..89ca9f7 100644 --- a/build.rs +++ b/build.rs @@ -4,6 +4,7 @@ use std::env; fn main() { let target = env::var("TARGET").unwrap(); + let msvc = target.contains("msvc"); let mut cfg = cc::Build::new(); @@ -18,19 +19,15 @@ fn main() { stacker: {}\n\n", target); } - if target.contains("windows") { - cfg.file("src/arch/windows.c"); + if target.starts_with("x86_64") { + cfg.file(if msvc {"src/arch/x86_64.asm"} else {"src/arch/x86_64.S"}); + cfg.define("X86_64", None); + } else if target.contains("i686") { + cfg.file(if msvc {"src/arch/i686.asm"} else {"src/arch/i686.S"}); + cfg.define("X86", None); } else { - if target.starts_with("x86_64") { - cfg.file("src/arch/x86_64.S"); - cfg.define("X86_64", None); - } else if target.contains("i686") { - cfg.file("src/arch/i686.S"); - cfg.define("X86", None); - } else { - panic!("\n\nusing currently unsupported target triple with \ - stacker: {}\n\n", target); - } + panic!("\n\nusing currently unsupported target triple with \ + stacker: {}\n\n", target); } cfg.include("src/arch").compile("libstacker.a"); diff --git a/src/arch/i686.asm b/src/arch/i686.asm new file mode 100644 index 0000000..664080d --- /dev/null +++ b/src/arch/i686.asm @@ -0,0 +1,8 @@ +.586 +.MODEL FLAT, C +.CODE + +__stacker_stack_pointer PROC + MOV EAX, ESP + RET +__stacker_stack_pointer ENDP diff --git a/src/arch/x86_64.asm b/src/arch/x86_64.asm new file mode 100644 index 0000000..9be61b4 --- /dev/null +++ b/src/arch/x86_64.asm @@ -0,0 +1,6 @@ +_text SEGMENT + +__stacker_stack_pointer PROC + MOV RAX, RSP + RET +__stacker_stack_pointer ENDP From e96b9ef7df5d58a57831e7e9691f8b0d514bcdde Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 10:32:35 +0100 Subject: [PATCH 07/17] Implement windows without C --- src/arch/windows.c | 30 ------------------- src/lib.rs | 73 +++++++++++++++++++--------------------------- 2 files changed, 30 insertions(+), 73 deletions(-) delete mode 100644 src/arch/windows.c diff --git a/src/arch/windows.c b/src/arch/windows.c deleted file mode 100644 index 1b894c1..0000000 --- a/src/arch/windows.c +++ /dev/null @@ -1,30 +0,0 @@ -#include - -void __stacker_black_box() {} - -PVOID __stacker_get_current_fiber() { - return GetCurrentFiber(); -} - -static size_t calc_stack_limit(size_t stack_low, size_t stack_guarantee) { - return stack_low + - max(stack_guarantee, sizeof(void *) == 4 ? 0x1000 : 0x2000) + // The guaranteed pages on a stack overflow - 0x1000; // The guard page -} - -// Fast paths for x86 -// magic numbers are from https://en.wikipedia.org/wiki/Win32_Thread_Information_Block - -#if defined(_M_X64) -size_t __stacker_get_stack_limit() { - return calc_stack_limit(__readgsqword(0x1478), // The base address of the stack. Referenced in GetCurrentThreadStackLimits - __readgsqword(0x1748)); // The guaranteed pages on a stack overflow. Referenced in SetThreadStackGuarantee -} -#endif - -#ifdef _M_IX86 -size_t __stacker_get_stack_limit() { - return calc_stack_limit(__readfsdword(0xE0C), // The base address of the stack. Referenced in GetCurrentThreadStackLimits - __readfsdword(0xF78)); // The guaranteed pages on a stack overflow. Referenced in SetThreadStackGuarantee -} -#endif diff --git a/src/lib.rs b/src/lib.rs index f03bb8c..2a27553 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,8 @@ extern crate kernel32; #[cfg(windows)] extern crate winapi; +use std::cell::Cell; + /// Grows the call stack if necessary. /// /// This function is intended to be called at manually instrumented points in a @@ -78,10 +80,23 @@ pub fn grow R>(stack_size: usize, f: F) -> R { ret.unwrap() } +thread_local! { + static STACK_LIMIT: Cell = Cell::new(unsafe { + guess_os_stack_limit() + }) +} + +#[inline(always)] +fn get_stack_limit() -> usize { + STACK_LIMIT.with(|s| s.get()) +} + +fn set_stack_limit(l: usize) { + STACK_LIMIT.with(|s| s.set(l)) +} + cfg_if! { if #[cfg(not(windows))] { - use std::cell::Cell; - extern { fn __stacker_switch_stacks(dataptr: *mut u8, fnptr: *const u8, @@ -89,21 +104,6 @@ cfg_if! { fn getpagesize() -> libc::c_int; } - thread_local! { - static STACK_LIMIT: Cell = Cell::new(unsafe { - guess_os_stack_limit() - }) - } - - #[inline(always)] - fn get_stack_limit() -> usize { - STACK_LIMIT.with(|s| s.get()) - } - - fn set_stack_limit(l: usize) { - STACK_LIMIT.with(|s| s.set(l)) - } - struct StackSwitch { map: *mut libc::c_void, stack_size: usize, @@ -180,6 +180,10 @@ cfg_if! { 0 }; + extern fn doit(f: &mut &mut FnMut()) { + f(); + } + unsafe { __stacker_switch_stacks(&mut f as *mut &mut FnMut() as *mut u8, doit as usize as *const _, @@ -191,21 +195,8 @@ cfg_if! { } } -extern fn doit(f: &mut &mut FnMut()) { - f(); -} - cfg_if! { if #[cfg(windows)] { - extern { - fn __stacker_get_current_fiber() -> winapi::PVOID; - } - - #[no_mangle] - pub unsafe extern fn __stacker_switch_stacks_callback(f: &mut &mut FnMut()) { - f(); - } - struct FiberInfo<'a> { callback: &'a mut FnMut(), result: Option>, @@ -229,7 +220,10 @@ cfg_if! { callback, result: None, parent_fiber: if was_fiber { - __stacker_get_current_fiber() + extern { + fn GetCurrentFiber() -> *mut winapi::c_void; + } + GetCurrentFiber() } else { kernel32::ConvertThreadToFiber(0i32 as _) }, @@ -237,6 +231,7 @@ cfg_if! { if info.parent_fiber == 0i32 as _ { panic!("unable to convert thread to fiber"); } + set_stack_limit(stack_size); let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); if fiber == 0i32 as _ { panic!("unable to allocate fiber"); @@ -256,15 +251,9 @@ cfg_if! { cfg_if! { if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { - extern { - fn __stacker_get_stack_limit() -> usize; - } - #[inline(always)] - fn get_stack_limit() -> usize { - unsafe { - __stacker_get_stack_limit() - } + unsafe fn guess_os_stack_limit() -> usize { + unimplemented!() } } else { #[inline(always)] @@ -282,11 +271,9 @@ cfg_if! { } #[inline(always)] - fn get_stack_limit() -> usize { + unsafe fn guess_os_stack_limit() -> usize { let mut mi; - unsafe { - kernel32::VirtualQuery(__stacker_stack_pointer(), &mut mi, std::mem::size_of_val(&mi)); - } + kernel32::VirtualQuery(__stacker_stack_pointer(), &mut mi, std::mem::size_of_val(&mi)); mi.AllocationBase + get_thread_stack_guarantee() + 0x1000 } } From 4ebbeca3d375ea2de72a6c93cfb048a2f866dc93 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 10:33:02 +0100 Subject: [PATCH 08/17] Remove unnecessary parentheses --- src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 2a27553..386b559 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -127,7 +127,7 @@ cfg_if! { let stack_size = if rem == 0 { stack_size } else { - stack_size.checked_add((page_size - rem)) + stack_size.checked_add(page_size - rem) .expect("stack size calculation overflowed") }; From da91897dc5e7a7ca0dd3dcc401da1d2df910c3cc Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 11:08:30 +0100 Subject: [PATCH 09/17] Overallocate on platforms without stack size probing support --- src/lib.rs | 51 +++++++++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 22 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 386b559..6408bcb 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,10 +44,15 @@ use std::cell::Cell; /// The closure `f` is guaranteed to run on a stack with at least `red_zone` /// bytes, and it will be run on the current stack if there's space available. #[inline(always)] -pub fn maybe_grow R>(red_zone: usize, - stack_size: usize, - f: F) -> R { - if remaining_stack() >= red_zone { +pub fn maybe_grow R>( + red_zone: usize, + stack_size: usize, + f: F, +) -> R { + // if we can't guess the remaining stack (unsupported on some platforms) + // we immediately grow the stack and then cache the new stack size (which + // we do know now because we know by how much we grew the stack) + if remaining_stack().map_or(false, |remaining| remaining >= red_zone) { f() } else { grow(stack_size, f) @@ -63,8 +68,8 @@ extern { /// This function will return the amount of stack space left which will be used /// to determine whether a stack switch should be made or not. #[inline(always)] -pub fn remaining_stack() -> usize { - unsafe { __stacker_stack_pointer() - get_stack_limit() } +pub fn remaining_stack() -> Option { + get_stack_limit().map(|limit| unsafe { __stacker_stack_pointer() - limit }) } /// Always creates a new stack for the passed closure to run on. @@ -81,18 +86,18 @@ pub fn grow R>(stack_size: usize, f: F) -> R { } thread_local! { - static STACK_LIMIT: Cell = Cell::new(unsafe { + static STACK_LIMIT: Cell> = Cell::new(unsafe { guess_os_stack_limit() }) } #[inline(always)] -fn get_stack_limit() -> usize { +fn get_stack_limit() -> Option { STACK_LIMIT.with(|s| s.get()) } fn set_stack_limit(l: usize) { - STACK_LIMIT.with(|s| s.set(l)) + STACK_LIMIT.with(|s| s.set(Some(l))) } cfg_if! { @@ -107,7 +112,7 @@ cfg_if! { struct StackSwitch { map: *mut libc::c_void, stack_size: usize, - old_stack_limit: usize, + old_stack_limit: Option, } impl Drop for StackSwitch { @@ -115,7 +120,9 @@ cfg_if! { unsafe { libc::munmap(self.map, self.stack_size); } - set_stack_limit(self.old_stack_limit); + if let Some(limit) = self.old_stack_limit { + set_stack_limit(limit); + } } } @@ -252,8 +259,8 @@ cfg_if! { cfg_if! { if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { #[inline(always)] - unsafe fn guess_os_stack_limit() -> usize { - unimplemented!() + unsafe fn guess_os_stack_limit() -> Option { + None } } else { #[inline(always)] @@ -271,17 +278,17 @@ cfg_if! { } #[inline(always)] - unsafe fn guess_os_stack_limit() -> usize { + unsafe fn guess_os_stack_limit() -> Option { let mut mi; kernel32::VirtualQuery(__stacker_stack_pointer(), &mut mi, std::mem::size_of_val(&mi)); - mi.AllocationBase + get_thread_stack_guarantee() + 0x1000 + Some(mi.AllocationBase + get_thread_stack_guarantee() + 0x1000) } } } } else if #[cfg(target_os = "linux")] { use std::mem; - unsafe fn guess_os_stack_limit() -> usize { + unsafe fn guess_os_stack_limit() -> Option { let mut attr: libc::pthread_attr_t = mem::zeroed(); assert_eq!(libc::pthread_attr_init(&mut attr), 0); assert_eq!(libc::pthread_getattr_np(libc::pthread_self(), @@ -291,16 +298,16 @@ cfg_if! { assert_eq!(libc::pthread_attr_getstack(&attr, &mut stackaddr, &mut stacksize), 0); assert_eq!(libc::pthread_attr_destroy(&mut attr), 0); - stackaddr as usize + Some(stackaddr as usize) } } else if #[cfg(target_os = "macos")] { - unsafe fn guess_os_stack_limit() -> usize { - libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize - - libc::pthread_get_stacksize_np(libc::pthread_self()) as usize + unsafe fn guess_os_stack_limit() -> Option { + Some(libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize - + libc::pthread_get_stacksize_np(libc::pthread_self()) as usize) } } else { - unsafe fn guess_os_stack_limit() -> usize { - panic!("cannot guess the stack limit on this platform"); + unsafe fn guess_os_stack_limit() -> Option { + None } } } From 76eb7677ec2213137039d7a9116963eadb0ad474 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Wed, 28 Nov 2018 12:40:46 +0100 Subject: [PATCH 10/17] Document a few more function calls --- src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 6408bcb..ef96a69 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -223,6 +223,7 @@ cfg_if! { unsafe { let was_fiber = kernel32::IsThreadAFiber() == winapi::TRUE; + // Fibers are essentially stackfull coroutines let mut info = FiberInfo { callback, result: None, @@ -243,7 +244,9 @@ cfg_if! { if fiber == 0i32 as _ { panic!("unable to allocate fiber"); } + // switch to fiber and immediately execute kernel32::SwitchToFiber(fiber); + // fiber execution finished, we can safely delete it now kernel32::DeleteFiber(fiber); if !was_fiber { From 0d67572843bdb867e81ae7fa28a548780e3487d8 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Thu, 29 Nov 2018 14:37:05 +0100 Subject: [PATCH 11/17] Document some more things --- src/lib.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index ef96a69..31b2076 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -239,6 +239,9 @@ cfg_if! { if info.parent_fiber == 0i32 as _ { panic!("unable to convert thread to fiber"); } + // remember the old stack limit + let old_stack_limit = get_stack_limit(); + // bump the know stack size in the thread local set_stack_limit(stack_size); let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); if fiber == 0i32 as _ { @@ -249,6 +252,11 @@ cfg_if! { // fiber execution finished, we can safely delete it now kernel32::DeleteFiber(fiber); + // restore the old stack limit + if let Some(old) = old_stack_limit { + set_stack_limit(old); + } + if !was_fiber { kernel32::ConvertFiberToThread(); } @@ -262,6 +270,7 @@ cfg_if! { cfg_if! { if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { #[inline(always)] + // We cannot know the initial stack size on x86 unsafe fn guess_os_stack_limit() -> Option { None } @@ -309,6 +318,9 @@ cfg_if! { libc::pthread_get_stacksize_np(libc::pthread_self()) as usize) } } else { + // fallback for other platforms is to always increase the stack if we're on + // the root stack. After we increased the stack once, we know the new stack + // size and don't need this pessimization anymore unsafe fn guess_os_stack_limit() -> Option { None } From a8875a4a8fb8eb0deffbd34f5e2d5e0f2b76098a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20K=C3=A5re=20Alsaker?= Date: Mon, 3 Dec 2018 11:29:14 +0100 Subject: [PATCH 12/17] Fix things --- build.rs | 1 + src/arch/i686.S | 9 ++-- src/arch/i686.asm | 2 + src/arch/windows.c | 7 +++ src/arch/x86_64.S | 9 ++-- src/arch/x86_64.asm | 2 + src/lib.rs | 128 ++++++++++++++++++++++++++------------------ 7 files changed, 101 insertions(+), 57 deletions(-) create mode 100644 src/arch/windows.c diff --git a/build.rs b/build.rs index 89ca9f7..132053f 100644 --- a/build.rs +++ b/build.rs @@ -14,6 +14,7 @@ fn main() { cfg.define("APPLE", None); } else if target.contains("windows") { cfg.define("WINDOWS", None); + cfg.file("src/arch/windows.c"); } else { panic!("\n\nusing currently unsupported target triple with \ stacker: {}\n\n", target); diff --git a/src/arch/i686.S b/src/arch/i686.S index 687d0c4..8a08708 100644 --- a/src/arch/i686.S +++ b/src/arch/i686.S @@ -10,12 +10,15 @@ GLOBAL(__stacker_black_box): ret GLOBAL(__stacker_switch_stacks): + // CFI instructions tells the unwinder how to unwind this function + // This enables unwinding through our extended stacks and also + // backtrackes .cfi_startproc push %ebp - .cfi_def_cfa_offset 8 - .cfi_offset ebp, -8 + .cfi_def_cfa_offset 8 // restore esp by adding 8 + .cfi_offset ebp, -8 // restore ebp from the stack mov %esp, %ebp - .cfi_def_cfa_register ebp + .cfi_def_cfa_register ebp // restore esp from ebp mov 16(%ebp), %esp // switch to our new stack mov 12(%ebp), %eax // load function we're going to call push 8(%ebp) // push argument to first function diff --git a/src/arch/i686.asm b/src/arch/i686.asm index 664080d..425d42c 100644 --- a/src/arch/i686.asm +++ b/src/arch/i686.asm @@ -6,3 +6,5 @@ __stacker_stack_pointer PROC MOV EAX, ESP RET __stacker_stack_pointer ENDP + +END diff --git a/src/arch/windows.c b/src/arch/windows.c new file mode 100644 index 0000000..e1646e4 --- /dev/null +++ b/src/arch/windows.c @@ -0,0 +1,7 @@ +#include + +void __stacker_black_box() {} + +PVOID __stacker_get_current_fiber() { + return GetCurrentFiber(); +} diff --git a/src/arch/x86_64.S b/src/arch/x86_64.S index 744609f..1e42d84 100644 --- a/src/arch/x86_64.S +++ b/src/arch/x86_64.S @@ -9,12 +9,15 @@ GLOBAL(__stacker_stack_pointer): ret GLOBAL(__stacker_switch_stacks): + // CFI instructions tells the unwinder how to unwind this function + // This enables unwinding through our extended stacks and also + // backtrackes .cfi_startproc push %rbp - .cfi_def_cfa_offset 16 - .cfi_offset rbp, -16 + .cfi_def_cfa_offset 16 // restore rsp by adding 16 + .cfi_offset rbp, -16 // restore rbp from the stack mov %rsp, %rbp - .cfi_def_cfa_register rbp + .cfi_def_cfa_register rbp // restore rsp from rbp mov %rdx, %rsp // switch to our new stack call *%rsi // call our function pointer, data argument in %rdi mov %rbp, %rsp // restore the old stack pointer diff --git a/src/arch/x86_64.asm b/src/arch/x86_64.asm index 9be61b4..ad5f470 100644 --- a/src/arch/x86_64.asm +++ b/src/arch/x86_64.asm @@ -4,3 +4,5 @@ __stacker_stack_pointer PROC MOV RAX, RSP RET __stacker_stack_pointer ENDP + +END \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 31b2076..2081476 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -96,8 +96,8 @@ fn get_stack_limit() -> Option { STACK_LIMIT.with(|s| s.get()) } -fn set_stack_limit(l: usize) { - STACK_LIMIT.with(|s| s.set(Some(l))) +fn set_stack_limit(l: Option) { + STACK_LIMIT.with(|s| s.set(l)) } cfg_if! { @@ -120,9 +120,7 @@ cfg_if! { unsafe { libc::munmap(self.map, self.stack_size); } - if let Some(limit) = self.old_stack_limit { - set_stack_limit(limit); - } + set_stack_limit(self.old_stack_limit); } } @@ -174,7 +172,7 @@ cfg_if! { let stack_low = map as usize; // Prepare stack limits for the stack switch - set_stack_limit(stack_low); + set_stack_limit(Some(stack_low)); // Make sure the stack is 16-byte aligned which should be enough for all // platforms right now. Allocations on 64-bit are already 16-byte aligned @@ -204,6 +202,10 @@ cfg_if! { cfg_if! { if #[cfg(windows)] { + extern { + fn __stacker_get_current_fiber() -> winapi::PVOID; + } + struct FiberInfo<'a> { callback: &'a mut FnMut(), result: Option>, @@ -212,51 +214,72 @@ cfg_if! { unsafe extern "system" fn fiber_proc(info: winapi::LPVOID) { let info = &mut *(info as *mut FiberInfo); + + // Remember the old stack limit + let old_stack_limit = get_stack_limit(); + // Update the limit to that of the fiber stack + set_stack_limit(guess_os_stack_limit()); + info.result = Some(std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { (info.callback)(); }))); + + // Restore the stack limit of the previous fiber + set_stack_limit(old_stack_limit); + kernel32::SwitchToFiber(info.parent_fiber); return; } fn _grow(stack_size: usize, callback: &mut FnMut()) { unsafe { + // Fibers (or stackful coroutines) is the only way to create new stacks on the + // same thread on Windows. So in order to extend the stack we create fiber + // and switch to it so we can use it's stack. After running + // `callback` we switch back to the current stack and destroy + // the fiber and its associated stack. + let was_fiber = kernel32::IsThreadAFiber() == winapi::TRUE; - // Fibers are essentially stackfull coroutines let mut info = FiberInfo { callback, result: None, - parent_fiber: if was_fiber { - extern { - fn GetCurrentFiber() -> *mut winapi::c_void; + + // We need a handle to the current stack / fiber so we can switch back to it + parent_fiber: { + // Is the current thread already a fiber? This is the case when we already + // used a fiber to extend the stack + if was_fiber { + // Get a handle to the current fiber. We need to use C for this + // as GetCurrentFiber is an header only function. + __stacker_get_current_fiber() + } else { + // Convert the current thread to a fiber, so we are able to switch back + // to the current stack. Threads coverted to fibers still act like + // regular threads, but they have associated fiber data. We later + // convert it back to a regular thread and free the fiber data. + kernel32::ConvertThreadToFiber(0i32 as _) } - GetCurrentFiber() - } else { - kernel32::ConvertThreadToFiber(0i32 as _) }, }; if info.parent_fiber == 0i32 as _ { + // We don't have a handle to the fiber, so we can't switch back panic!("unable to convert thread to fiber"); } - // remember the old stack limit - let old_stack_limit = get_stack_limit(); - // bump the know stack size in the thread local - set_stack_limit(stack_size); + let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); if fiber == 0i32 as _ { panic!("unable to allocate fiber"); } - // switch to fiber and immediately execute + + // Switch to the fiber we created. This changes stacks and starts executing + // fiber_proc on it. fiber_proc will run `callback` and then switch back kernel32::SwitchToFiber(fiber); - // fiber execution finished, we can safely delete it now - kernel32::DeleteFiber(fiber); - // restore the old stack limit - if let Some(old) = old_stack_limit { - set_stack_limit(old); - } + // We are back on the old stack and now we have destroy the fiber and its stack + kernel32::DeleteFiber(fiber); + // If the if !was_fiber { kernel32::ConvertFiberToThread(); } @@ -267,35 +290,38 @@ cfg_if! { } } - cfg_if! { - if #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] { - #[inline(always)] - // We cannot know the initial stack size on x86 - unsafe fn guess_os_stack_limit() -> Option { - None - } + #[inline(always)] + fn get_thread_stack_guarantee() -> usize { + let min_guarantee = if cfg!(target_pointer_width = "32") { + 0x1000 } else { - #[inline(always)] - fn get_thread_stack_guarantee() -> usize { - let min_guarantee = if cfg!(target_pointer_width = "32") { - 0x1000 - } else { - 0x2000 - }; - let mut stack_guarantee = 0; - unsafe { - kernel32::SetThreadStackGuarantee(&mut stack_guarantee) - }; - std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000 - } + 0x2000 + }; + let mut stack_guarantee = 0; + unsafe { + // Read the current thread stack guarantee + // This is the stack reserved for stack overflow + // exception handling. + // This doesn't return the true value so we need + // some further logic to calculate the real stack + // guarantee. This logic is what is used on x86-32 and + // x86-64 Windows 10. Other versions and platforms may differ + kernel32::SetThreadStackGuarantee(&mut stack_guarantee) + }; + std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000 + } - #[inline(always)] - unsafe fn guess_os_stack_limit() -> Option { - let mut mi; - kernel32::VirtualQuery(__stacker_stack_pointer(), &mut mi, std::mem::size_of_val(&mi)); - Some(mi.AllocationBase + get_thread_stack_guarantee() + 0x1000) - } - } + #[inline(always)] + unsafe fn guess_os_stack_limit() -> Option { + let mut mi = std::mem::zeroed(); + // Query the allocation which contains our stack pointer in order + // to discover the size of the stack + kernel32::VirtualQuery( + __stacker_stack_pointer() as *const _, + &mut mi, + std::mem::size_of_val(&mi) as winapi::SIZE_T, + ); + Some(mi.AllocationBase as usize + get_thread_stack_guarantee() + 0x1000) } } else if #[cfg(target_os = "linux")] { use std::mem; From 1f0706c964c31c6847acde821c5634d0bfa00f59 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Thu, 6 Dec 2018 13:01:03 +0100 Subject: [PATCH 13/17] Fill out partial comment --- src/lib.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index 2081476..14ce141 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -279,7 +279,8 @@ cfg_if! { // We are back on the old stack and now we have destroy the fiber and its stack kernel32::DeleteFiber(fiber); - // If the + // If we started out on a non-fiber thread, we converted that thread to a fiber. + // Here we convert back. if !was_fiber { kernel32::ConvertFiberToThread(); } From 0a03ce9b29cb51ba1de46d17452d91958fad56a7 Mon Sep 17 00:00:00 2001 From: Oliver Scherer Date: Mon, 10 Dec 2018 13:29:28 +0100 Subject: [PATCH 14/17] Implement black box inside test file --- src/arch/i686.S | 3 --- src/arch/windows.c | 2 -- src/arch/x86_64.S | 2 -- tests/smoke.rs | 15 +++++++-------- 4 files changed, 7 insertions(+), 15 deletions(-) diff --git a/src/arch/i686.S b/src/arch/i686.S index 8a08708..49c4708 100644 --- a/src/arch/i686.S +++ b/src/arch/i686.S @@ -6,9 +6,6 @@ GLOBAL(__stacker_stack_pointer): mov %esp, %eax ret -GLOBAL(__stacker_black_box): - ret - GLOBAL(__stacker_switch_stacks): // CFI instructions tells the unwinder how to unwind this function // This enables unwinding through our extended stacks and also diff --git a/src/arch/windows.c b/src/arch/windows.c index e1646e4..89485a0 100644 --- a/src/arch/windows.c +++ b/src/arch/windows.c @@ -1,7 +1,5 @@ #include -void __stacker_black_box() {} - PVOID __stacker_get_current_fiber() { return GetCurrentFiber(); } diff --git a/src/arch/x86_64.S b/src/arch/x86_64.S index 1e42d84..598efa1 100644 --- a/src/arch/x86_64.S +++ b/src/arch/x86_64.S @@ -2,8 +2,6 @@ .text -GLOBAL(__stacker_black_box): - ret GLOBAL(__stacker_stack_pointer): movq %rsp, %rax ret diff --git a/tests/smoke.rs b/tests/smoke.rs index 80d12cf..41f41c7 100644 --- a/tests/smoke.rs +++ b/tests/smoke.rs @@ -3,19 +3,18 @@ extern crate stacker; use std::sync::mpsc; use std::thread; -extern { - fn __stacker_black_box(t: *const u8); -} +#[inline(never)] +fn __stacker_black_box(_: *const u8) {} #[test] fn deep() { fn foo(n: usize, s: &mut [u8]) { - unsafe { __stacker_black_box(s.as_ptr()); } + __stacker_black_box(s.as_ptr()); if n > 0 { stacker::maybe_grow(64 * 1024, 1024 * 1024, || { let mut s = [0u8; 1024]; foo(n - 1, &mut s); - unsafe { __stacker_black_box(s.as_ptr()); } + __stacker_black_box(s.as_ptr()); }) } else { println!("bottom"); @@ -28,12 +27,12 @@ fn deep() { #[test] fn panic() { fn foo(n: usize, s: &mut [u8]) { - unsafe { __stacker_black_box(s.as_ptr()); } + __stacker_black_box(s.as_ptr()); if n > 0 { stacker::maybe_grow(64 * 1024, 1024 * 1024, || { let mut s = [0u8; 1024]; foo(n - 1, &mut s); - unsafe { __stacker_black_box(s.as_ptr()); } + __stacker_black_box(s.as_ptr()); }) } else { panic!("bottom"); @@ -84,4 +83,4 @@ fn catch_panic_leaf() { let panic_result = std::panic::catch_unwind(|| panic!()); assert!(panic_result.is_err()); }); -} \ No newline at end of file +} From 1d37dc051505ece09d728a2e3c4530166aeec5c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20S=CC=B6c=CC=B6h=CC=B6n=CC=B6e=CC=B6i=CC=B6d=CC=B6?= =?UTF-8?q?e=CC=B6r=20Scherer?= Date: Mon, 10 Dec 2018 16:52:52 +0100 Subject: [PATCH 15/17] 32 bit windows uses `_` prefixes for globals --- src/arch/asm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arch/asm.h b/src/arch/asm.h index e53773e..56c9d28 100644 --- a/src/arch/asm.h +++ b/src/arch/asm.h @@ -1,4 +1,4 @@ -#if defined(APPLE) +#if defined(APPLE) || (defined(WINDOWS) && defined(X86)) #define GLOBAL(name) .globl _ ## name; _ ## name #else #define GLOBAL(name) .globl name; name From 70cbe925c4a422bab12d08b4f63a46a64d5ba2ee Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 10 Dec 2018 13:08:50 -0800 Subject: [PATCH 16/17] Switch to winapi 0.3 --- Cargo.toml | 16 +++++++++++----- src/lib.rs | 36 +++++++++++++++++++++--------------- 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 921e3f4..f6355be 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,12 +19,18 @@ doctest = false test = false [dependencies] -cfg-if = "0.1" -libc = "0.2" +cfg-if = "0.1.6" +libc = "0.2.45" -[target.'cfg(windows)'.dependencies] -kernel32-sys = "0.2.2" -winapi = "0.2.8" +[target.'cfg(windows)'.dependencies.winapi] +version = "0.3.6" +features = [ + 'memoryapi', + 'winbase', + 'fibersapi', + 'processthreadsapi', + 'minwindef', +] [build-dependencies] cc = "1.0" diff --git a/src/lib.rs b/src/lib.rs index 14ce141..6aadad8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,8 +28,6 @@ extern crate cfg_if; extern crate libc; #[cfg(windows)] -extern crate kernel32; -#[cfg(windows)] extern crate winapi; use std::cell::Cell; @@ -202,17 +200,25 @@ cfg_if! { cfg_if! { if #[cfg(windows)] { + use winapi::shared::basetsd::*; + use winapi::shared::minwindef::{LPVOID, BOOL}; + use winapi::shared::ntdef::*; + use winapi::um::fibersapi::*; + use winapi::um::memoryapi::*; + use winapi::um::processthreadsapi::*; + use winapi::um::winbase::*; + extern { - fn __stacker_get_current_fiber() -> winapi::PVOID; + fn __stacker_get_current_fiber() -> PVOID; } struct FiberInfo<'a> { callback: &'a mut FnMut(), result: Option>, - parent_fiber: winapi::LPVOID, + parent_fiber: LPVOID, } - unsafe extern "system" fn fiber_proc(info: winapi::LPVOID) { + unsafe extern "system" fn fiber_proc(info: LPVOID) { let info = &mut *(info as *mut FiberInfo); // Remember the old stack limit @@ -227,7 +233,7 @@ cfg_if! { // Restore the stack limit of the previous fiber set_stack_limit(old_stack_limit); - kernel32::SwitchToFiber(info.parent_fiber); + SwitchToFiber(info.parent_fiber); return; } @@ -239,7 +245,7 @@ cfg_if! { // `callback` we switch back to the current stack and destroy // the fiber and its associated stack. - let was_fiber = kernel32::IsThreadAFiber() == winapi::TRUE; + let was_fiber = IsThreadAFiber() == TRUE as BOOL; let mut info = FiberInfo { callback, @@ -258,7 +264,7 @@ cfg_if! { // to the current stack. Threads coverted to fibers still act like // regular threads, but they have associated fiber data. We later // convert it back to a regular thread and free the fiber data. - kernel32::ConvertThreadToFiber(0i32 as _) + ConvertThreadToFiber(0i32 as _) } }, }; @@ -267,22 +273,22 @@ cfg_if! { panic!("unable to convert thread to fiber"); } - let fiber = kernel32::CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); + let fiber = CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); if fiber == 0i32 as _ { panic!("unable to allocate fiber"); } // Switch to the fiber we created. This changes stacks and starts executing // fiber_proc on it. fiber_proc will run `callback` and then switch back - kernel32::SwitchToFiber(fiber); + SwitchToFiber(fiber); // We are back on the old stack and now we have destroy the fiber and its stack - kernel32::DeleteFiber(fiber); + DeleteFiber(fiber); // If we started out on a non-fiber thread, we converted that thread to a fiber. // Here we convert back. if !was_fiber { - kernel32::ConvertFiberToThread(); + ConvertFiberToThread(); } if let Err(payload) = info.result.unwrap() { @@ -307,7 +313,7 @@ cfg_if! { // some further logic to calculate the real stack // guarantee. This logic is what is used on x86-32 and // x86-64 Windows 10. Other versions and platforms may differ - kernel32::SetThreadStackGuarantee(&mut stack_guarantee) + SetThreadStackGuarantee(&mut stack_guarantee) }; std::cmp::max(stack_guarantee, min_guarantee) as usize + 0x1000 } @@ -317,10 +323,10 @@ cfg_if! { let mut mi = std::mem::zeroed(); // Query the allocation which contains our stack pointer in order // to discover the size of the stack - kernel32::VirtualQuery( + VirtualQuery( __stacker_stack_pointer() as *const _, &mut mi, - std::mem::size_of_val(&mi) as winapi::SIZE_T, + std::mem::size_of_val(&mi) as SIZE_T, ); Some(mi.AllocationBase as usize + get_thread_stack_guarantee() + 0x1000) } From 2c3befad3fc57e53c06286d092429cc5442c2e63 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 10 Dec 2018 13:16:42 -0800 Subject: [PATCH 17/17] Touch up style a bit in fibers implementation --- src/lib.rs | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 6aadad8..acff7b9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -200,6 +200,9 @@ cfg_if! { cfg_if! { if #[cfg(windows)] { + use std::ptr; + use std::io; + use winapi::shared::basetsd::*; use winapi::shared::minwindef::{LPVOID, BOOL}; use winapi::shared::ntdef::*; @@ -264,18 +267,22 @@ cfg_if! { // to the current stack. Threads coverted to fibers still act like // regular threads, but they have associated fiber data. We later // convert it back to a regular thread and free the fiber data. - ConvertThreadToFiber(0i32 as _) + ConvertThreadToFiber(ptr::null_mut()) } }, }; - if info.parent_fiber == 0i32 as _ { + if info.parent_fiber.is_null() { // We don't have a handle to the fiber, so we can't switch back - panic!("unable to convert thread to fiber"); + panic!("unable to convert thread to fiber: {}", io::Error::last_os_error()); } - let fiber = CreateFiber(stack_size as _, Some(fiber_proc), &mut info as *mut FiberInfo as *mut _); - if fiber == 0i32 as _ { - panic!("unable to allocate fiber"); + let fiber = CreateFiber( + stack_size as SIZE_T, + Some(fiber_proc), + &mut info as *mut FiberInfo as *mut _, + ); + if fiber.is_null() { + panic!("unable to allocate fiber: {}", io::Error::last_os_error()); } // Switch to the fiber we created. This changes stacks and starts executing @@ -288,7 +295,9 @@ cfg_if! { // If we started out on a non-fiber thread, we converted that thread to a fiber. // Here we convert back. if !was_fiber { - ConvertFiberToThread(); + if ConvertFiberToThread() == 0 { + panic!("unable to convert back to thread: {}", io::Error::last_os_error()); + } } if let Err(payload) = info.result.unwrap() {