From e7a3c341dd517465a326ef693d49f6238789f96b Mon Sep 17 00:00:00 2001 From: niluxv Date: Sat, 5 Aug 2023 20:20:41 +0200 Subject: [PATCH 1/2] Use pointers instead of `usize` addresses for landing pads This bring unwind and personality code more in line with strict-provenance --- library/panic_unwind/src/gcc.rs | 2 +- library/std/src/sys/personality/dwarf/eh.rs | 66 ++++++++++++--------- library/std/src/sys/personality/gcc.rs | 9 ++- library/unwind/src/lib.rs | 1 + library/unwind/src/libunwind.rs | 14 ++--- 5 files changed, 50 insertions(+), 42 deletions(-) diff --git a/library/panic_unwind/src/gcc.rs b/library/panic_unwind/src/gcc.rs index 08858dd92be09..54eb6627c0129 100644 --- a/library/panic_unwind/src/gcc.rs +++ b/library/panic_unwind/src/gcc.rs @@ -63,7 +63,7 @@ pub unsafe fn panic(data: Box) -> u32 { _uwe: uw::_Unwind_Exception { exception_class: rust_exception_class(), exception_cleanup, - private: [0; uw::unwinder_private_data_size], + private: [core::ptr::null(); uw::unwinder_private_data_size], }, canary: &CANARY, cause: data, diff --git a/library/std/src/sys/personality/dwarf/eh.rs b/library/std/src/sys/personality/dwarf/eh.rs index 79624703a4cf7..dd0a76100e239 100644 --- a/library/std/src/sys/personality/dwarf/eh.rs +++ b/library/std/src/sys/personality/dwarf/eh.rs @@ -1,6 +1,7 @@ //! Parsing of GCC-style Language-Specific Data Area (LSDA) //! For details see: //! * +//! * //! * //! * //! * @@ -37,17 +38,19 @@ pub const DW_EH_PE_indirect: u8 = 0x80; #[derive(Copy, Clone)] pub struct EHContext<'a> { - pub ip: usize, // Current instruction pointer - pub func_start: usize, // Address of the current function - pub get_text_start: &'a dyn Fn() -> usize, // Get address of the code section - pub get_data_start: &'a dyn Fn() -> usize, // Get address of the data section + pub ip: *const u8, // Current instruction pointer + pub func_start: *const u8, // Pointer to the current function + pub get_text_start: &'a dyn Fn() -> *const u8, // Get pointer to the code section + pub get_data_start: &'a dyn Fn() -> *const u8, // Get pointer to the data section } +/// Landing pad. +type LPad = *const u8; pub enum EHAction { None, - Cleanup(usize), - Catch(usize), - Filter(usize), + Cleanup(LPad), + Catch(LPad), + Filter(LPad), Terminate, } @@ -82,21 +85,21 @@ pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result if !USING_SJLJ_EXCEPTIONS { while reader.ptr < action_table { - let cs_start = read_encoded_pointer(&mut reader, context, call_site_encoding)?; - let cs_len = read_encoded_pointer(&mut reader, context, call_site_encoding)?; - let cs_lpad = read_encoded_pointer(&mut reader, context, call_site_encoding)?; + let cs_start = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); + let cs_len = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); + let cs_lpad = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); let cs_action_entry = reader.read_uleb128(); // Callsite table is sorted by cs_start, so if we've passed the ip, we // may stop searching. - if ip < func_start + cs_start { + if ip < func_start.wrapping_add(cs_start) { break; } - if ip < func_start + cs_start + cs_len { + if ip < func_start.wrapping_add(cs_start + cs_len) { if cs_lpad == 0 { return Ok(EHAction::None); } else { - let lpad = lpad_base + cs_lpad; - return Ok(interpret_cs_action(action_table as *mut u8, cs_action_entry, lpad)); + let lpad = lpad_base.wrapping_add(cs_lpad); + return Ok(interpret_cs_action(action_table, cs_action_entry, lpad)); } } } @@ -106,12 +109,12 @@ pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result // SjLj version: // The "IP" is an index into the call-site table, with two exceptions: // -1 means 'no-action', and 0 means 'terminate'. - match ip as isize { + match ip.addr() as isize { -1 => return Ok(EHAction::None), 0 => return Ok(EHAction::Terminate), _ => (), } - let mut idx = ip; + let mut idx = ip.addr(); loop { let cs_lpad = reader.read_uleb128(); let cs_action_entry = reader.read_uleb128(); @@ -119,17 +122,18 @@ pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result if idx == 0 { // Can never have null landing pad for sjlj -- that would have // been indicated by a -1 call site index. - let lpad = (cs_lpad + 1) as usize; - return Ok(interpret_cs_action(action_table as *mut u8, cs_action_entry, lpad)); + // FIXME(strict provenance) + let lpad = ptr::from_exposed_addr((cs_lpad + 1) as usize); + return Ok(interpret_cs_action(action_table, cs_action_entry, lpad)); } } } } unsafe fn interpret_cs_action( - action_table: *mut u8, + action_table: *const u8, cs_action_entry: u64, - lpad: usize, + lpad: LPad, ) -> EHAction { if cs_action_entry == 0 { // If cs_action_entry is 0 then this is a cleanup (Drop::drop). We run these @@ -138,7 +142,7 @@ unsafe fn interpret_cs_action( } else { // If lpad != 0 and cs_action_entry != 0, we have to check ttype_index. // If ttype_index == 0 under the condition, we take cleanup action. - let action_record = (action_table as *mut u8).offset(cs_action_entry as isize - 1); + let action_record = action_table.offset(cs_action_entry as isize - 1); let mut action_reader = DwarfReader::new(action_record); let ttype_index = action_reader.read_sleb128(); if ttype_index == 0 { @@ -161,15 +165,16 @@ unsafe fn read_encoded_pointer( reader: &mut DwarfReader, context: &EHContext<'_>, encoding: u8, -) -> Result { +) -> Result<*const u8, ()> { if encoding == DW_EH_PE_omit { return Err(()); } // DW_EH_PE_aligned implies it's an absolute pointer value if encoding == DW_EH_PE_aligned { - reader.ptr = reader.ptr.with_addr(round_up(reader.ptr.addr(), mem::size_of::())?); - return Ok(reader.read::()); + reader.ptr = + reader.ptr.with_addr(round_up(reader.ptr.addr(), mem::size_of::<*const u8>())?); + return Ok(reader.read::<*const u8>()); } let mut result = match encoding & 0x0F { @@ -190,18 +195,21 @@ unsafe fn read_encoded_pointer( // relative to address of the encoded value, despite the name DW_EH_PE_pcrel => reader.ptr.expose_addr(), DW_EH_PE_funcrel => { - if context.func_start == 0 { + if context.func_start.is_null() { return Err(()); } - context.func_start + context.func_start.expose_addr() } - DW_EH_PE_textrel => (*context.get_text_start)(), - DW_EH_PE_datarel => (*context.get_data_start)(), + DW_EH_PE_textrel => (*context.get_text_start)().expose_addr(), + DW_EH_PE_datarel => (*context.get_data_start)().expose_addr(), _ => return Err(()), }; + // FIXME(strict provenance) + let mut result: *const u8 = ptr::from_exposed_addr::(result); + if encoding & DW_EH_PE_indirect != 0 { - result = *ptr::from_exposed_addr::(result); + result = *(result.cast::<*const u8>()); } Ok(result) diff --git a/library/std/src/sys/personality/gcc.rs b/library/std/src/sys/personality/gcc.rs index 559d2c7db4717..6f317131145ae 100644 --- a/library/std/src/sys/personality/gcc.rs +++ b/library/std/src/sys/personality/gcc.rs @@ -38,7 +38,6 @@ use super::dwarf::eh::{self, EHAction, EHContext}; use crate::ffi::c_int; -use libc::uintptr_t; use unwind as uw; // Register ids were lifted from LLVM's TargetLowering::getExceptionPointerRegister() @@ -160,9 +159,9 @@ cfg_if::cfg_if! { uw::_Unwind_SetGR( context, UNWIND_DATA_REG.0, - exception_object as uintptr_t, + exception_object as uw::_Unwind_Ptr, ); - uw::_Unwind_SetGR(context, UNWIND_DATA_REG.1, 0); + uw::_Unwind_SetGR(context, UNWIND_DATA_REG.1, core::ptr::null()); uw::_Unwind_SetIP(context, lpad); return uw::_URC_INSTALL_CONTEXT; } @@ -222,9 +221,9 @@ cfg_if::cfg_if! { uw::_Unwind_SetGR( context, UNWIND_DATA_REG.0, - exception_object as uintptr_t, + exception_object.cast(), ); - uw::_Unwind_SetGR(context, UNWIND_DATA_REG.1, 0); + uw::_Unwind_SetGR(context, UNWIND_DATA_REG.1, core::ptr::null()); uw::_Unwind_SetIP(context, lpad); uw::_URC_INSTALL_CONTEXT } diff --git a/library/unwind/src/lib.rs b/library/unwind/src/lib.rs index 94a91dd357c9f..e86408a9ed2bd 100644 --- a/library/unwind/src/lib.rs +++ b/library/unwind/src/lib.rs @@ -4,6 +4,7 @@ #![feature(staged_api)] #![feature(c_unwind)] #![feature(cfg_target_abi)] +#![feature(strict_provenance)] #![cfg_attr(not(target_env = "msvc"), feature(libc))] #![allow(internal_features)] diff --git a/library/unwind/src/libunwind.rs b/library/unwind/src/libunwind.rs index a2bfa8e96dd22..dba64aa74ce60 100644 --- a/library/unwind/src/libunwind.rs +++ b/library/unwind/src/libunwind.rs @@ -1,6 +1,6 @@ #![allow(nonstandard_style)] -use libc::{c_int, c_void, uintptr_t}; +use libc::{c_int, c_void}; #[repr(C)] #[derive(Debug, Copy, Clone, PartialEq)] @@ -19,8 +19,8 @@ pub enum _Unwind_Reason_Code { pub use _Unwind_Reason_Code::*; pub type _Unwind_Exception_Class = u64; -pub type _Unwind_Word = uintptr_t; -pub type _Unwind_Ptr = uintptr_t; +pub type _Unwind_Word = *const u8; +pub type _Unwind_Ptr = *const u8; pub type _Unwind_Trace_Fn = extern "C" fn(ctx: *mut _Unwind_Context, arg: *mut c_void) -> _Unwind_Reason_Code; @@ -214,7 +214,7 @@ if #[cfg(any(target_os = "ios", target_os = "tvos", target_os = "watchos", targe // On Android or ARM/Linux, these are implemented as macros: pub unsafe fn _Unwind_GetGR(ctx: *mut _Unwind_Context, reg_index: c_int) -> _Unwind_Word { - let mut val: _Unwind_Word = 0; + let mut val: _Unwind_Word = core::ptr::null(); _Unwind_VRS_Get(ctx, _UVRSC_CORE, reg_index as _Unwind_Word, _UVRSD_UINT32, &mut val as *mut _ as *mut c_void); val @@ -229,14 +229,14 @@ if #[cfg(any(target_os = "ios", target_os = "tvos", target_os = "watchos", targe pub unsafe fn _Unwind_GetIP(ctx: *mut _Unwind_Context) -> _Unwind_Word { let val = _Unwind_GetGR(ctx, UNWIND_IP_REG); - (val & !1) as _Unwind_Word + val.map_addr(|v| v & !1) } pub unsafe fn _Unwind_SetIP(ctx: *mut _Unwind_Context, value: _Unwind_Word) { // Propagate thumb bit to instruction pointer - let thumb_state = _Unwind_GetGR(ctx, UNWIND_IP_REG) & 1; - let value = value | thumb_state; + let thumb_state = _Unwind_GetGR(ctx, UNWIND_IP_REG).addr() & 1; + let value = value.map_addr(|v| v | thumb_state); _Unwind_SetGR(ctx, UNWIND_IP_REG, value); } From b48039f6feec314842d40236f33d6a1c5b66b3da Mon Sep 17 00:00:00 2001 From: niluxv Date: Sun, 6 Aug 2023 11:40:34 +0200 Subject: [PATCH 2/2] Rewrite `read_encoded_pointer` conforming to strict provenance * Entries in the callsite table now use a dedicated function for reading an offset rather than a pointer * `read_encoded_pointer` uses that new function for reading offsets when the "application" part of the encoding indicates an offset (relative to some pointer) * It now errors out on nonsensical "application" and "value encoding" combinations Inspired by @eddyb's comment on zulip about this: --- library/std/src/sys/personality/dwarf/eh.rs | 97 +++++++++++++++------ 1 file changed, 69 insertions(+), 28 deletions(-) diff --git a/library/std/src/sys/personality/dwarf/eh.rs b/library/std/src/sys/personality/dwarf/eh.rs index dd0a76100e239..a78084de0faef 100644 --- a/library/std/src/sys/personality/dwarf/eh.rs +++ b/library/std/src/sys/personality/dwarf/eh.rs @@ -84,10 +84,12 @@ pub unsafe fn find_eh_action(lsda: *const u8, context: &EHContext<'_>) -> Result let ip = context.ip; if !USING_SJLJ_EXCEPTIONS { + // read the callsite table while reader.ptr < action_table { - let cs_start = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); - let cs_len = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); - let cs_lpad = read_encoded_pointer(&mut reader, context, call_site_encoding)?.addr(); + // these are offsets rather than pointers; + let cs_start = read_encoded_offset(&mut reader, call_site_encoding)?; + let cs_len = read_encoded_offset(&mut reader, call_site_encoding)?; + let cs_lpad = read_encoded_offset(&mut reader, call_site_encoding)?; let cs_action_entry = reader.read_uleb128(); // Callsite table is sorted by cs_start, so if we've passed the ip, we // may stop searching. @@ -161,23 +163,24 @@ fn round_up(unrounded: usize, align: usize) -> Result { if align.is_power_of_two() { Ok((unrounded + align - 1) & !(align - 1)) } else { Err(()) } } -unsafe fn read_encoded_pointer( - reader: &mut DwarfReader, - context: &EHContext<'_>, - encoding: u8, -) -> Result<*const u8, ()> { - if encoding == DW_EH_PE_omit { +/// Read a offset (`usize`) from `reader` whose encoding is described by `encoding`. +/// +/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext]. +/// In addition the upper ("application") part must be zero. +/// +/// # Errors +/// Returns `Err` if `encoding` +/// * is not a valid DWARF Exception Header Encoding, +/// * is `DW_EH_PE_omit`, or +/// * has a non-zero application part. +/// +/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html +unsafe fn read_encoded_offset(reader: &mut DwarfReader, encoding: u8) -> Result { + if encoding == DW_EH_PE_omit || encoding & 0xF0 != 0 { return Err(()); } - - // DW_EH_PE_aligned implies it's an absolute pointer value - if encoding == DW_EH_PE_aligned { - reader.ptr = - reader.ptr.with_addr(round_up(reader.ptr.addr(), mem::size_of::<*const u8>())?); - return Ok(reader.read::<*const u8>()); - } - - let mut result = match encoding & 0x0F { + let result = match encoding & 0x0F { + // despite the name, LLVM also uses absptr for offsets instead of pointers DW_EH_PE_absptr => reader.read::(), DW_EH_PE_uleb128 => reader.read_uleb128() as usize, DW_EH_PE_udata2 => reader.read::() as usize, @@ -189,28 +192,66 @@ unsafe fn read_encoded_pointer( DW_EH_PE_sdata8 => reader.read::() as usize, _ => return Err(()), }; + Ok(result) +} - result += match encoding & 0x70 { - DW_EH_PE_absptr => 0, +/// Read a pointer from `reader` whose encoding is described by `encoding`. +/// +/// `encoding` must be a [DWARF Exception Header Encoding as described by the LSB spec][LSB-dwarf-ext]. +/// +/// # Errors +/// Returns `Err` if `encoding` +/// * is not a valid DWARF Exception Header Encoding, +/// * is `DW_EH_PE_omit`, or +/// * combines `DW_EH_PE_absptr` or `DW_EH_PE_aligned` application part with an integer encoding +/// (not `DW_EH_PE_absptr`) in the value format part. +/// +/// [LSB-dwarf-ext]: https://refspecs.linuxfoundation.org/LSB_5.0.0/LSB-Core-generic/LSB-Core-generic/dwarfext.html +unsafe fn read_encoded_pointer( + reader: &mut DwarfReader, + context: &EHContext<'_>, + encoding: u8, +) -> Result<*const u8, ()> { + if encoding == DW_EH_PE_omit { + return Err(()); + } + + let base_ptr = match encoding & 0x70 { + DW_EH_PE_absptr => core::ptr::null(), // relative to address of the encoded value, despite the name - DW_EH_PE_pcrel => reader.ptr.expose_addr(), + DW_EH_PE_pcrel => reader.ptr, DW_EH_PE_funcrel => { if context.func_start.is_null() { return Err(()); } - context.func_start.expose_addr() + context.func_start + } + DW_EH_PE_textrel => (*context.get_text_start)(), + DW_EH_PE_datarel => (*context.get_data_start)(), + // aligned means the value is aligned to the size of a pointer + DW_EH_PE_aligned => { + reader.ptr = + reader.ptr.with_addr(round_up(reader.ptr.addr(), mem::size_of::<*const u8>())?); + core::ptr::null() } - DW_EH_PE_textrel => (*context.get_text_start)().expose_addr(), - DW_EH_PE_datarel => (*context.get_data_start)().expose_addr(), _ => return Err(()), }; - // FIXME(strict provenance) - let mut result: *const u8 = ptr::from_exposed_addr::(result); + let mut ptr = if base_ptr.is_null() { + // any value encoding other than absptr would be nonsensical here; + // there would be no source of pointer provenance + if encoding & 0x0F != DW_EH_PE_absptr { + return Err(()); + } + reader.read::<*const u8>() + } else { + let offset = read_encoded_offset(reader, encoding & 0x0F)?; + base_ptr.wrapping_add(offset) + }; if encoding & DW_EH_PE_indirect != 0 { - result = *(result.cast::<*const u8>()); + ptr = *(ptr.cast::<*const u8>()); } - Ok(result) + Ok(ptr) }