From 1c5b2399f509d0e88f101f2caeb1b645784c8423 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Tue, 30 Jan 2024 13:11:33 -0500 Subject: [PATCH 01/16] Enable crashtracker metadata to be modified at runtime --- crashtracker/src/api.rs | 15 +++++++++++- crashtracker/src/constants.rs | 8 ++++--- crashtracker/src/crash_handler.rs | 40 +++++++++++++++++++++++++++---- crashtracker/src/crash_info.rs | 19 ++++++++++----- crashtracker/src/lib.rs | 1 + crashtracker/src/receiver.rs | 21 ++++++++++------ profiling-ffi/src/crashtracker.rs | 22 ++++++++++++++++- 7 files changed, 103 insertions(+), 23 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 0102d96dc..41096c20d 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -2,6 +2,8 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. #![cfg(unix)] +use crate::update_metadata; + use super::{ counters::reset_counters, crash_handler::{ @@ -169,6 +171,7 @@ pub fn init( // ./build-profiling-ffi /tmp/libdatadog // mkdir /tmp/crashreports // look in /tmp/crashreports for the crash reports and output files +// Commented out since `ignore` doesn't work in CI. //#[test] fn test_crash() { use crate::begin_profiling_op; @@ -187,7 +190,7 @@ fn test_crash() { let path_to_receiver_binary = "/tmp/libdatadog/bin/libdatadog-crashtracking-receiver".to_string(); let create_alt_stack = true; - let resolve_frames = CrashtrackerResolveFrames::ExperimentalInReceiver; + let resolve_frames = CrashtrackerResolveFrames::Never; let stderr_filename = Some(format!("{dir}/stderr_{time}.txt")); let stdout_filename = Some(format!("{dir}/stdout_{time}.txt")); @@ -208,6 +211,16 @@ fn test_crash() { ); init(config, metadata).expect("not to fail"); begin_profiling_op(crate::ProfilingOpTypes::CollectingSample).expect("Not to fail"); + + let tag = Tag::new("apple", "banana").expect("tag"); + let metadata2 = CrashtrackerMetadata::new( + "libname".to_string(), + "version".to_string(), + "family".to_string(), + vec![tag], + ); + update_metadata(&metadata2).expect("metadata"); + let p: *const u32 = std::ptr::null(); let q = unsafe { *p }; assert_eq!(q, 3); diff --git a/crashtracker/src/constants.rs b/crashtracker/src/constants.rs index 1811ae557..b7ab4257e 100644 --- a/crashtracker/src/constants.rs +++ b/crashtracker/src/constants.rs @@ -1,11 +1,13 @@ // Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. +pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; pub const DD_CRASHTRACK_BEGIN_FILE: &str = "DD_CRASHTRACK_BEGIN_FILE"; +pub const DD_CRASHTRACK_BEGIN_METADATA: &str = "DD_CRASHTRACK_BEGIN_METADATA"; pub const DD_CRASHTRACK_BEGIN_SIGINFO: &str = "DD_CRASHTRACK_BEGIN_SIGINFO"; pub const DD_CRASHTRACK_BEGIN_STACKTRACE: &str = "DD_CRASHTRACK_BEGIN_STACKTRACE"; +pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; pub const DD_CRASHTRACK_END_FILE: &str = "DD_CRASHTRACK_END_FILE"; -pub const DD_CRASHTRACK_END_STACKTRACE: &str = "DD_CRASHTRACK_END_STACKTRACE"; +pub const DD_CRASHTRACK_END_METADATA: &str = "DD_CRASHTRACK_END_METADATA"; pub const DD_CRASHTRACK_END_SIGINFO: &str = "DD_CRASHTRACK_END_SIGINFO"; -pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; -pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; +pub const DD_CRASHTRACK_END_STACKTRACE: &str = "DD_CRASHTRACK_END_STACKTRACE"; diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index af106d86f..8e9a7241e 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -37,6 +37,7 @@ static mut RECEIVER: GlobalVarState = GlobalVarState::Unass static mut OLD_SIGBUS_HANDLER: GlobalVarState = GlobalVarState::Unassigned; static mut OLD_SIGSEGV_HANDLER: GlobalVarState = GlobalVarState::Unassigned; static mut RESOLVE_FRAMES: bool = false; +static mut METADATA_STRING: Option = None; fn make_receiver( config: &CrashtrackerConfiguration, @@ -73,14 +74,29 @@ fn make_receiver( "{}", serde_json::to_string(&config)? )?; - writeln!( - receiver.stdin.as_ref().unwrap(), - "{}", - serde_json::to_string(&metadata)? - )?; + + update_metadata(metadata)?; Ok(receiver) } +/// Updates the crashtracker metadata for this process +/// Metadata is stored in a global variable and sent to the crashtracking +/// receiver when a crash occurs. +/// +/// PRECONDITIONS: +/// None +/// SAFETY: +/// Crash-tracking functions are not reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function is not atomic. A crash during its execution may lead to +/// unexpected crash-handling behaviour. +pub fn update_metadata(metadata: &CrashtrackerMetadata) -> anyhow::Result<()> { + let metadata_string = serde_json::to_string(&metadata)?; + unsafe { METADATA_STRING = Some(metadata_string) }; + Ok(()) +} + pub fn setup_receiver( config: &CrashtrackerConfiguration, metadata: &CrashtrackerMetadata, @@ -142,6 +158,17 @@ extern "C" fn handle_posix_signal(signum: i32) { // return to old handler (chain). See comments on `restore_old_handler`. } +fn emit_metadata(w: &mut impl Write) -> anyhow::Result<()> { + writeln!(w, "{DD_CRASHTRACK_BEGIN_METADATA}")?; + + let metadata = unsafe { &METADATA_STRING.as_ref().context("Expected metadata")? }; + writeln!(w, "{}", metadata)?; + + writeln!(w, "{DD_CRASHTRACK_END_METADATA}")?; + + Ok(()) +} + fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { let mut receiver = match std::mem::replace(unsafe { &mut RECEIVER }, GlobalVarState::Taken) { GlobalVarState::Some(r) => r, @@ -158,6 +185,9 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { }; let pipe = receiver.stdin.as_mut().unwrap(); + + emit_metadata(pipe)?; + writeln!(pipe, "{DD_CRASHTRACK_BEGIN_SIGINFO}")?; writeln!(pipe, "{{\"signum\": {signum}, \"signame\": \"{signame}\"}}")?; writeln!(pipe, "{DD_CRASHTRACK_END_SIGINFO}")?; diff --git a/crashtracker/src/crash_info.rs b/crashtracker/src/crash_info.rs index 9c80f7589..25ab98bfd 100644 --- a/crashtracker/src/crash_info.rs +++ b/crashtracker/src/crash_info.rs @@ -39,7 +39,7 @@ pub struct CrashInfo { additional_stacktraces: HashMap>, counters: HashMap, files: HashMap>, - metadata: CrashtrackerMetadata, + metadata: Option, os_info: os_info::Info, siginfo: Option, stacktrace: Vec, @@ -54,22 +54,24 @@ impl CrashInfo { pub fn crash_seen(&self) -> bool { self.siginfo.is_some() } +} - pub fn get_metadata(&self) -> &CrashtrackerMetadata { - &self.metadata +impl Default for CrashInfo { + fn default() -> Self { + Self::new() } } /// Constructor and setters impl CrashInfo { - pub fn new(metadata: CrashtrackerMetadata) -> Self { + pub fn new() -> Self { let os_info = os_info::get(); let uuid = Uuid::new_v4(); Self { additional_stacktraces: HashMap::new(), counters: HashMap::new(), files: HashMap::new(), - metadata, + metadata: None, os_info, siginfo: None, stacktrace: vec![], @@ -105,6 +107,11 @@ impl CrashInfo { Ok(()) } + pub fn set_metadata(&mut self, metadata: CrashtrackerMetadata) -> anyhow::Result<()> { + anyhow::ensure!(self.metadata.is_none()); + self.metadata = Some(metadata); + Ok(()) + } pub fn set_siginfo(&mut self, siginfo: SigInfo) -> anyhow::Result<()> { anyhow::ensure!(self.siginfo.is_none()); self.siginfo = Some(siginfo); @@ -156,7 +163,7 @@ impl CrashInfo { //let site = "datad0g.com"; //let api_key = std::env::var("DD_API_KEY")?; let data = serde_json::to_vec(self)?; - let metadata = self.get_metadata(); + let metadata = &self.metadata.as_ref().context("Missing metadata")?; let is_crash_tag = make_tag("is_crash", "yes")?; let tags = Some( diff --git a/crashtracker/src/lib.rs b/crashtracker/src/lib.rs index 8cb31076e..1782d1cdc 100644 --- a/crashtracker/src/lib.rs +++ b/crashtracker/src/lib.rs @@ -60,5 +60,6 @@ mod receiver; pub use api::*; pub use constants::*; pub use counters::{begin_profiling_op, end_profiling_op, ProfilingOpTypes}; +pub use crash_handler::update_metadata; pub use crash_info::*; pub use receiver::receiver_entry_point; diff --git a/crashtracker/src/receiver.rs b/crashtracker/src/receiver.rs index dc9b3de26..b9bb1fe1e 100644 --- a/crashtracker/src/receiver.rs +++ b/crashtracker/src/receiver.rs @@ -19,11 +19,7 @@ pub fn receiver_entry_point() -> anyhow::Result<()> { std::io::stdin().lock().read_line(&mut config)?; let config: CrashtrackerConfiguration = serde_json::from_str(&config)?; - let mut metadata = String::new(); - std::io::stdin().lock().read_line(&mut metadata)?; - let metadata: CrashtrackerMetadata = serde_json::from_str(&metadata)?; - - match receive_report(&metadata)? { + match receive_report()? { CrashReportStatus::NoCrash => Ok(()), CrashReportStatus::CrashReport(crash_info) => { if config.resolve_frames == CrashtrackerResolveFrames::ExperimentalInReceiver { @@ -48,6 +44,7 @@ pub fn receiver_entry_point() -> anyhow::Result<()> { enum StdinState { Counters, File(String, Vec), + Metadata, SigInfo, StackTrace(Vec), Waiting, @@ -86,6 +83,12 @@ fn process_line( StdinState::File(name, contents) } + StdinState::Metadata if line.starts_with(DD_CRASHTRACK_END_METADATA) => StdinState::Waiting, + StdinState::Metadata => { + let metadata = serde_json::from_str(&line)?; + crashinfo.set_metadata(metadata)?; + StdinState::Metadata + } StdinState::SigInfo if line.starts_with(DD_CRASHTRACK_END_SIGINFO) => StdinState::Waiting, StdinState::SigInfo => { let siginfo = serde_json::from_str(&line)?; @@ -111,6 +114,9 @@ fn process_line( let (_, filename) = line.split_once(' ').unwrap_or(("", "MISSING_FILENAME")); StdinState::File(filename.to_string(), vec![]) } + StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_METADATA) => { + StdinState::Metadata + } StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_SIGINFO) => StdinState::SigInfo, StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_STACKTRACE) => { StdinState::StackTrace(vec![]) @@ -137,8 +143,9 @@ enum CrashReportStatus { /// In the case where the parent failed to transfer a full crash-report /// (for instance if it crashed while calculating the crash-report), we return /// a PartialCrashReport. -fn receive_report(metadata: &CrashtrackerMetadata) -> anyhow::Result { - let mut crashinfo = CrashInfo::new(metadata.clone()); +// TODO: Make this take a buffer, rather than assuming input on stdin +fn receive_report() -> anyhow::Result { + let mut crashinfo = CrashInfo::new(); let mut stdin_state = StdinState::Waiting; //TODO: This assumes that the input is valid UTF-8. for line in std::io::stdin().lock().lines() { diff --git a/profiling-ffi/src/crashtracker.rs b/profiling-ffi/src/crashtracker.rs index 0872ae5d7..fc0e80a84 100644 --- a/profiling-ffi/src/crashtracker.rs +++ b/profiling-ffi/src/crashtracker.rs @@ -121,6 +121,26 @@ pub unsafe extern "C" fn ddog_prof_crashtracker_shutdown() -> ProfileResult { } } +#[no_mangle] +#[must_use] +pub unsafe extern "C" fn ddog_prof_crashtracker_update_metadata( + metadata: CrashtrackerMetadata, +) -> ProfileResult { + match ddog_prof_crashtracker_update_metadata_impl(metadata) { + Ok(_) => ProfileResult::Ok(true), + Err(err) => ProfileResult::Err(Error::from( + err.context("ddog_prof_crashtracker_update_metadata failed"), + )), + } +} + +unsafe fn ddog_prof_crashtracker_update_metadata_impl( + metadata: CrashtrackerMetadata, +) -> anyhow::Result<()> { + let metadata = metadata.try_into()?; + datadog_crashtracker::update_metadata(&metadata) +} + #[no_mangle] #[must_use] pub unsafe extern "C" fn ddog_prof_crashtracker_update_on_fork( @@ -130,7 +150,7 @@ pub unsafe extern "C" fn ddog_prof_crashtracker_update_on_fork( match ddog_prof_crashtracker_update_on_fork_impl(config, metadata) { Ok(_) => ProfileResult::Ok(true), Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_init failed"), + err.context("ddog_prof_crashtracker_update_on_fork failed"), )), } } From c19efc099d2524dede7d85882ae57dd9271a36d3 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Mon, 12 Feb 2024 19:04:46 -0500 Subject: [PATCH 02/16] use atomic for RECEIVER --- crashtracker/src/crash_handler.rs | 82 +++++++++++++++---------------- 1 file changed, 39 insertions(+), 43 deletions(-) diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index baa9bce33..5bee9cdef 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -3,14 +3,10 @@ #![cfg(unix)] -use std::io::Write; -use std::ptr; - -#[cfg(target_os = "linux")] -use super::collectors::emit_proc_self_maps; - use super::api::{CrashtrackerConfiguration, CrashtrackerMetadata, CrashtrackerResolveFrames}; use super::collectors::emit_backtrace_by_frames; +#[cfg(target_os = "linux")] +use super::collectors::emit_proc_self_maps; use super::constants::*; use super::counters::emit_counters; use anyhow::Context; @@ -21,7 +17,11 @@ use libc::{ use nix::sys::signal; use nix::sys::signal::{SaFlags, SigAction, SigHandler}; use std::fs::File; +use std::io::Write; use std::process::{Command, Stdio}; +use std::ptr; +use std::sync::atomic::AtomicPtr; +use std::sync::atomic::Ordering::SeqCst; #[derive(Debug)] enum GlobalVarState @@ -33,7 +33,7 @@ where Taken, } -static mut RECEIVER: GlobalVarState = GlobalVarState::Unassigned; +static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static mut OLD_SIGBUS_HANDLER: GlobalVarState = GlobalVarState::Unassigned; static mut OLD_SIGSEGV_HANDLER: GlobalVarState = GlobalVarState::Unassigned; static mut RESOLVE_FRAMES: bool = false; @@ -101,50 +101,47 @@ pub fn setup_receiver( config: &CrashtrackerConfiguration, metadata: &CrashtrackerMetadata, ) -> anyhow::Result<()> { - let new_receiver = make_receiver(config, metadata)?; - let old_receiver = - unsafe { std::mem::replace(&mut RECEIVER, GlobalVarState::Some(new_receiver)) }; + let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); + let old_receiver = RECEIVER.swap(new_receiver, SeqCst); anyhow::ensure!( - matches!(old_receiver, GlobalVarState::Unassigned), - "Error registering crash handler receiver: receiver already existed {old_receiver:?}" + old_receiver.is_null(), + "Error registering crash handler receiver: receiver already existed" ); - Ok(()) } + pub fn replace_receiver( config: &CrashtrackerConfiguration, metadata: &CrashtrackerMetadata, ) -> anyhow::Result<()> { - let new_receiver = make_receiver(config, metadata)?; - let old_receiver = - unsafe { std::mem::replace(&mut RECEIVER, GlobalVarState::Some(new_receiver)) }; - if let GlobalVarState::Some(mut old_receiver) = old_receiver { - // Close the stdin handle so we don't have two open copies - // TODO: dropping the old receiver at the end of this function might do this automatically? - drop(old_receiver.stdin.take()); - drop(old_receiver.stdout.take()); - drop(old_receiver.stderr.take()); - // Leave the old one running, since its being used by another fork - } else { - anyhow::bail!( - "Error updating crash handler receiver: receiver did not already exist {old_receiver:?}" - ); - } + let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); + let old_receiver: *mut std::process::Child = RECEIVER.swap(new_receiver, SeqCst); + anyhow::ensure!( + !old_receiver.is_null(), + "Error updating crash handler receiver: receiver did not already exist" + ); + let mut old_receiver: Box = unsafe { Box::from_raw(old_receiver) }; + // Close the stdin handle so we don't have two open copies + // TODO: dropping the old receiver at the end of this function might do this automatically? + drop(old_receiver.stdin.take()); + drop(old_receiver.stdout.take()); + drop(old_receiver.stderr.take()); + // Leave the old one running, since its being used by another fork Ok(()) } pub fn shutdown_receiver() -> anyhow::Result<()> { - let old_receiver = unsafe { std::mem::replace(&mut RECEIVER, GlobalVarState::Taken) }; - if let GlobalVarState::Some(mut old_receiver) = old_receiver { - old_receiver.kill()?; - old_receiver.wait()?; - } else { - anyhow::bail!( - "Error shutting down crash handler receiver: receiver did not already exist {old_receiver:?}" - ); - } + let old_receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!( + !old_receiver.is_null(), + "Error shutting down crash handler receiver: receiver did not already exist" + ); + let mut old_receiver = unsafe { Box::from_raw(old_receiver) }; + + old_receiver.kill()?; + old_receiver.wait()?; Ok(()) } @@ -170,11 +167,9 @@ fn emit_metadata(w: &mut impl Write) -> anyhow::Result<()> { } fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { - let mut receiver = match std::mem::replace(unsafe { &mut RECEIVER }, GlobalVarState::Taken) { - GlobalVarState::Some(r) => r, - GlobalVarState::Unassigned => anyhow::bail!("Cannot acquire receiver: Unassigned"), - GlobalVarState::Taken => anyhow::bail!("Cannot acquire receiver: Taken"), - }; + let receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!receiver.is_null(), "No crashtracking receiver"); + let receiver = unsafe { receiver.as_mut().context("")? }; let signame = if signum == libc::SIGSEGV { "SIGSEGV" @@ -211,8 +206,9 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { // The stdin handle to the child process, if any, will be closed before waiting. // This helps avoid deadlock: it ensures that the child does not block waiting // for input from the parent, while the parent waits for the child to exit. - //TODO, use a polling mechanism that could recover from a crashing child + // TODO, use a polling mechanism that could recover from a crashing child receiver.wait()?; + // Calling "free" in a signal handler is dangerous, so don't do that. Ok(()) } From f3b5d2520f7f70366a31c020ac4faa41b9aab09c Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Wed, 14 Feb 2024 17:45:26 -0500 Subject: [PATCH 03/16] use atomic ptr for the old actions --- crashtracker/src/api.rs | 5 +-- crashtracker/src/crash_handler.rs | 75 ++++++++++--------------------- 2 files changed, 25 insertions(+), 55 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 41096c20d..f9d8cb79e 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -2,8 +2,6 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. #![cfg(unix)] -use crate::update_metadata; - use super::{ counters::reset_counters, crash_handler::{ @@ -172,9 +170,10 @@ pub fn init( // mkdir /tmp/crashreports // look in /tmp/crashreports for the crash reports and output files // Commented out since `ignore` doesn't work in CI. -//#[test] +#[test] fn test_crash() { use crate::begin_profiling_op; + use crate::update_metadata; use chrono::Utc; use ddcommon::parse_uri; diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index 5bee9cdef..c4383d275 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -24,18 +24,14 @@ use std::sync::atomic::AtomicPtr; use std::sync::atomic::Ordering::SeqCst; #[derive(Debug)] -enum GlobalVarState -where - T: std::fmt::Debug, -{ - Unassigned, - Some(T), - Taken, +struct OldHandlers { + sigbus: SigAction, + sigsegv: SigAction, } +static OLD_HANDLERS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); + static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); -static mut OLD_SIGBUS_HANDLER: GlobalVarState = GlobalVarState::Unassigned; -static mut OLD_SIGSEGV_HANDLER: GlobalVarState = GlobalVarState::Unassigned; static mut RESOLVE_FRAMES: bool = false; static mut METADATA_STRING: Option = None; @@ -110,7 +106,6 @@ pub fn setup_receiver( Ok(()) } - pub fn replace_receiver( config: &CrashtrackerConfiguration, metadata: &CrashtrackerMetadata, @@ -212,33 +207,26 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { Ok(()) } +// TODO, there is a small race condition here, but we can keep it small pub fn register_crash_handlers(config: &CrashtrackerConfiguration) -> anyhow::Result<()> { + anyhow::ensure!(OLD_HANDLERS.load(SeqCst).is_null()); unsafe { RESOLVE_FRAMES = config.resolve_frames == CrashtrackerResolveFrames::ExperimentalInProcess; if config.create_alt_stack { set_alt_stack()?; } - register_signal_handler(signal::SIGBUS)?; - register_signal_handler(signal::SIGSEGV)?; + let sigbus = register_signal_handler(signal::SIGBUS)?; + let sigsegv = register_signal_handler(signal::SIGSEGV)?; + let boxed_ptr = Box::into_raw(Box::new(OldHandlers { sigbus, sigsegv })); + + let res = OLD_HANDLERS.compare_exchange(ptr::null_mut(), boxed_ptr, SeqCst, SeqCst); + anyhow::ensure!(res.is_ok()); } Ok(()) } -unsafe fn get_slot( - signal_type: signal::Signal, -) -> anyhow::Result<&'static mut GlobalVarState> { - let slot = match signal_type { - signal::SIGBUS => unsafe { &mut OLD_SIGBUS_HANDLER }, - signal::SIGSEGV => unsafe { &mut OLD_SIGSEGV_HANDLER }, - _ => anyhow::bail!("unexpected signal {signal_type}"), - }; - Ok(slot) -} - -unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result<()> { - let slot = get_slot(signal_type)?; - +unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result { // https://www.gnu.org/software/libc/manual/html_node/Flags-for-Sigaction.html // =============== // If this flag is set for a particular signal number, the system uses the @@ -256,35 +244,18 @@ unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result ); let old_handler = signal::sigaction(signal_type, &sig_action)?; - let should_be_empty = std::mem::replace(slot, GlobalVarState::Some(old_handler)); - anyhow::ensure!( - matches!(should_be_empty, GlobalVarState::Unassigned), - "Error registering crash handler: old_handler already existed {should_be_empty:?}" - ); - - Ok(()) -} - -unsafe fn replace_signal_handler(signal_type: signal::Signal) -> anyhow::Result<()> { - let slot = get_slot(signal_type)?; - - match std::mem::replace(slot, GlobalVarState::Taken) { - GlobalVarState::Some(old_handler) => unsafe { - signal::sigaction(signal_type, &old_handler)? - }, - x => anyhow::bail!("Cannot restore signal handler for {signal_type}: {x:?}"), - }; - Ok(()) + Ok(old_handler) } pub fn restore_old_handlers() -> anyhow::Result<()> { - // Restore the old handler, so that the current handler can return to it. - // Although this is technically UB, this is what Rust does in the same case. - // https://github.com/rust-lang/rust/blob/master/library/std/src/sys/unix/stack_overflow.rs#L75 - unsafe { - replace_signal_handler(signal::SIGSEGV)?; - replace_signal_handler(signal::SIGBUS)?; - } + let prev = OLD_HANDLERS.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!prev.is_null()); + // Safety: The only nonnull pointer stored here comes from Box::into_raw() + let prev = unsafe { Box::from_raw(prev) }; + // Safety: The value restored here was returned from a previous sigaction call + unsafe { signal::sigaction(signal::SIGBUS, &prev.sigbus)? }; + unsafe { signal::sigaction(signal::SIGSEGV, &prev.sigsegv)? }; + Ok(()) } From fee36192e6d4642ee63cbfa4589991ce399b6df2 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Wed, 14 Feb 2024 18:04:54 -0500 Subject: [PATCH 04/16] Leak the old handlers during a crash --- crashtracker/src/api.rs | 2 +- crashtracker/src/crash_handler.rs | 17 ++++++++++++----- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index f9d8cb79e..18dc397a0 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -100,7 +100,7 @@ impl CrashtrackerConfiguration { /// This function is not atomic. A crash during its execution may lead to /// unexpected crash-handling behaviour. pub fn shutdown_crash_handler() -> anyhow::Result<()> { - restore_old_handlers()?; + restore_old_handlers(false)?; shutdown_receiver()?; Ok(()) } diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index c4383d275..7ef2afaae 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -16,7 +16,7 @@ use libc::{ }; use nix::sys::signal; use nix::sys::signal::{SaFlags, SigAction, SigHandler}; -use std::fs::File; +use std::fs::{File, Metadata}; use std::io::Write; use std::process::{Command, Stdio}; use std::ptr; @@ -30,8 +30,10 @@ struct OldHandlers { } static OLD_HANDLERS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); - static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); +static METADATA: AtomicPtr<(CrashtrackerMetadata, String)> = AtomicPtr::new(ptr::null_mut()); +static CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); + static mut RESOLVE_FRAMES: bool = false; static mut METADATA_STRING: Option = None; @@ -116,6 +118,7 @@ pub fn replace_receiver( !old_receiver.is_null(), "Error updating crash handler receiver: receiver did not already exist" ); + // Safety: This was only ever created out of Box::into_raw let mut old_receiver: Box = unsafe { Box::from_raw(old_receiver) }; // Close the stdin handle so we don't have two open copies // TODO: dropping the old receiver at the end of this function might do this automatically? @@ -144,7 +147,7 @@ extern "C" fn handle_posix_signal(signum: i32) { // Safety: We've already crashed, this is a best effort to chain to the old // behaviour. Do this first to prevent recursive activation if this handler // itself crashes (e.g. while calculating stacktrace) - let _ = restore_old_handlers(); + let _ = restore_old_handlers(true); let _ = handle_posix_signal_impl(signum); // return to old handler (chain). See comments on `restore_old_handler`. @@ -247,7 +250,7 @@ unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result Ok(old_handler) } -pub fn restore_old_handlers() -> anyhow::Result<()> { +pub fn restore_old_handlers(leak: bool) -> anyhow::Result<()> { let prev = OLD_HANDLERS.swap(ptr::null_mut(), SeqCst); anyhow::ensure!(!prev.is_null()); // Safety: The only nonnull pointer stored here comes from Box::into_raw() @@ -255,7 +258,11 @@ pub fn restore_old_handlers() -> anyhow::Result<()> { // Safety: The value restored here was returned from a previous sigaction call unsafe { signal::sigaction(signal::SIGBUS, &prev.sigbus)? }; unsafe { signal::sigaction(signal::SIGSEGV, &prev.sigsegv)? }; - + // We want to avoid freeing memory inside the handler, so just leak it + // This is fine since we're crashing anyway at this point + if leak { + Box::leak(prev); + } Ok(()) } From 6e29ff0254723ac76e2b661c17fae09566a808c2 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Wed, 14 Feb 2024 18:24:54 -0500 Subject: [PATCH 05/16] Better way of handling metadata --- crashtracker/src/api.rs | 6 ++--- crashtracker/src/crash_handler.rs | 37 ++++++++++++++++++------------- profiling-ffi/src/crashtracker.rs | 2 +- 3 files changed, 26 insertions(+), 19 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 18dc397a0..852e8a960 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -134,7 +134,7 @@ pub fn on_fork( // https://man7.org/linux/man-pages/man2/sigaltstack.2.html // See function level comment about why we do this. - replace_receiver(&config, &metadata)?; + replace_receiver(&config, metadata)?; Ok(()) } @@ -154,7 +154,7 @@ pub fn init( ) -> anyhow::Result<()> { // Setup the receiver first, so that if there is a crash detected it has // somewhere to go. - setup_receiver(&config, &metadata)?; + setup_receiver(&config, metadata)?; register_crash_handlers(&config)?; Ok(()) } @@ -218,7 +218,7 @@ fn test_crash() { "family".to_string(), vec![tag], ); - update_metadata(&metadata2).expect("metadata"); + update_metadata(metadata2).expect("metadata"); let p: *const u32 = std::ptr::null(); let q = unsafe { *p }; diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index 7ef2afaae..b3b12cc96 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -16,7 +16,7 @@ use libc::{ }; use nix::sys::signal; use nix::sys::signal::{SaFlags, SigAction, SigHandler}; -use std::fs::{File, Metadata}; +use std::fs::File; use std::io::Write; use std::process::{Command, Stdio}; use std::ptr; @@ -32,14 +32,13 @@ struct OldHandlers { static OLD_HANDLERS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static METADATA: AtomicPtr<(CrashtrackerMetadata, String)> = AtomicPtr::new(ptr::null_mut()); -static CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); +static _CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); static mut RESOLVE_FRAMES: bool = false; -static mut METADATA_STRING: Option = None; fn make_receiver( config: &CrashtrackerConfiguration, - metadata: &CrashtrackerMetadata, + metadata: CrashtrackerMetadata, ) -> anyhow::Result { // TODO: currently create the file in write mode. Would append make more sense? let stderr = if let Some(filename) = &config.stderr_filename { @@ -89,15 +88,22 @@ fn make_receiver( /// ATOMICITY: /// This function is not atomic. A crash during its execution may lead to /// unexpected crash-handling behaviour. -pub fn update_metadata(metadata: &CrashtrackerMetadata) -> anyhow::Result<()> { +pub fn update_metadata(metadata: CrashtrackerMetadata) -> anyhow::Result<()> { let metadata_string = serde_json::to_string(&metadata)?; - unsafe { METADATA_STRING = Some(metadata_string) }; + let box_ptr = Box::into_raw(Box::new((metadata, metadata_string))); + let old = METADATA.swap(box_ptr, SeqCst); + if !old.is_null() { + // Safety: This can only come from a box above. + unsafe { + std::mem::drop(Box::from_raw(old)); + } + } Ok(()) } pub fn setup_receiver( config: &CrashtrackerConfiguration, - metadata: &CrashtrackerMetadata, + metadata: CrashtrackerMetadata, ) -> anyhow::Result<()> { let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); let old_receiver = RECEIVER.swap(new_receiver, SeqCst); @@ -110,7 +116,7 @@ pub fn setup_receiver( pub fn replace_receiver( config: &CrashtrackerConfiguration, - metadata: &CrashtrackerMetadata, + metadata: CrashtrackerMetadata, ) -> anyhow::Result<()> { let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); let old_receiver: *mut std::process::Child = RECEIVER.swap(new_receiver, SeqCst); @@ -153,14 +159,15 @@ extern "C" fn handle_posix_signal(signum: i32) { // return to old handler (chain). See comments on `restore_old_handler`. } -fn emit_metadata(w: &mut impl Write) -> anyhow::Result<()> { +fn emit_and_consume_metadata(w: &mut impl Write) -> anyhow::Result<()> { writeln!(w, "{DD_CRASHTRACK_BEGIN_METADATA}")?; + let metadata_ptr = METADATA.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!metadata_ptr.is_null()); + let (_metadata, metadata_string) = unsafe { metadata_ptr.as_ref().context("metadata ptr")? }; - let metadata = unsafe { &METADATA_STRING.as_ref().context("Expected metadata")? }; - writeln!(w, "{}", metadata)?; - + writeln!(w, "{}", metadata_string)?; writeln!(w, "{DD_CRASHTRACK_END_METADATA}")?; - + // Leak the metadata to avoid calling `drop` during a crash Ok(()) } @@ -179,7 +186,7 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { let pipe = receiver.stdin.as_mut().unwrap(); - emit_metadata(pipe)?; + emit_and_consume_metadata(pipe)?; writeln!(pipe, "{DD_CRASHTRACK_BEGIN_SIGINFO}")?; writeln!(pipe, "{{\"signum\": {signum}, \"signame\": \"{signame}\"}}")?; @@ -258,7 +265,7 @@ pub fn restore_old_handlers(leak: bool) -> anyhow::Result<()> { // Safety: The value restored here was returned from a previous sigaction call unsafe { signal::sigaction(signal::SIGBUS, &prev.sigbus)? }; unsafe { signal::sigaction(signal::SIGSEGV, &prev.sigsegv)? }; - // We want to avoid freeing memory inside the handler, so just leak it + // We want to avoid freeing memory inside the handler, so just leak it // This is fine since we're crashing anyway at this point if leak { Box::leak(prev); diff --git a/profiling-ffi/src/crashtracker.rs b/profiling-ffi/src/crashtracker.rs index d2c4582ac..fa94e4cba 100644 --- a/profiling-ffi/src/crashtracker.rs +++ b/profiling-ffi/src/crashtracker.rs @@ -135,7 +135,7 @@ unsafe fn ddog_prof_crashtracker_update_metadata_impl( metadata: CrashtrackerMetadata, ) -> anyhow::Result<()> { let metadata = metadata.try_into()?; - datadog_crashtracker::update_metadata(&metadata) + datadog_crashtracker::update_metadata(metadata) } #[no_mangle] From 2fbd711842cdf1023600d80b1ae6ea19e6915d20 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Thu, 15 Feb 2024 14:35:52 -0500 Subject: [PATCH 06/16] Better way of handling config --- crashtracker/src/api.rs | 14 ++-- crashtracker/src/constants.rs | 2 + crashtracker/src/crash_handler.rs | 114 ++++++++++++++++++++---------- crashtracker/src/lib.rs | 2 +- crashtracker/src/receiver.rs | 40 +++++++---- profiling-ffi/src/crashtracker.rs | 20 ++++++ 6 files changed, 132 insertions(+), 60 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 852e8a960..75de52e20 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -2,12 +2,11 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. #![cfg(unix)] +use crate::crash_handler::{register_crash_handlers, setup_receiver}; + use super::{ counters::reset_counters, - crash_handler::{ - register_crash_handlers, replace_receiver, restore_old_handlers, setup_receiver, - shutdown_receiver, - }, + crash_handler::{replace_receiver, restore_old_handlers, shutdown_receiver}, }; use ddcommon::tag::Tag; use ddcommon::Endpoint; @@ -134,7 +133,7 @@ pub fn on_fork( // https://man7.org/linux/man-pages/man2/sigaltstack.2.html // See function level comment about why we do this. - replace_receiver(&config, metadata)?; + replace_receiver(config, metadata)?; Ok(()) } @@ -154,8 +153,9 @@ pub fn init( ) -> anyhow::Result<()> { // Setup the receiver first, so that if there is a crash detected it has // somewhere to go. - setup_receiver(&config, metadata)?; - register_crash_handlers(&config)?; + let create_alt_stack = config.create_alt_stack; + setup_receiver(config, metadata)?; + register_crash_handlers(create_alt_stack)?; Ok(()) } diff --git a/crashtracker/src/constants.rs b/crashtracker/src/constants.rs index 6dfeafa8c..a6136cf61 100644 --- a/crashtracker/src/constants.rs +++ b/crashtracker/src/constants.rs @@ -1,12 +1,14 @@ // Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. +pub const DD_CRASHTRACK_BEGIN_CONFIG: &str = "DD_CRASHTRACK_BEGIN_CONFIG"; pub const DD_CRASHTRACK_BEGIN_COUNTERS: &str = "DD_CRASHTRACK_BEGIN_COUNTERS"; pub const DD_CRASHTRACK_BEGIN_FILE: &str = "DD_CRASHTRACK_BEGIN_FILE"; pub const DD_CRASHTRACK_BEGIN_METADATA: &str = "DD_CRASHTRACK_BEGIN_METADATA"; pub const DD_CRASHTRACK_BEGIN_SIGINFO: &str = "DD_CRASHTRACK_BEGIN_SIGINFO"; pub const DD_CRASHTRACK_BEGIN_STACKTRACE: &str = "DD_CRASHTRACK_BEGIN_STACKTRACE"; pub const DD_CRASHTRACK_DONE: &str = "DD_CRASHTRACK_DONE"; +pub const DD_CRASHTRACK_END_CONFIG: &str = "DD_CRASHTRACK_END_CONFIG"; pub const DD_CRASHTRACK_END_COUNTERS: &str = "DD_CRASHTRACK_END_COUNTERS"; pub const DD_CRASHTRACK_END_FILE: &str = "DD_CRASHTRACK_END_FILE"; pub const DD_CRASHTRACK_END_METADATA: &str = "DD_CRASHTRACK_END_METADATA"; diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index b3b12cc96..6d6e23926 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -32,12 +32,10 @@ struct OldHandlers { static OLD_HANDLERS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static METADATA: AtomicPtr<(CrashtrackerMetadata, String)> = AtomicPtr::new(ptr::null_mut()); -static _CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); - -static mut RESOLVE_FRAMES: bool = false; +static CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); fn make_receiver( - config: &CrashtrackerConfiguration, + config: CrashtrackerConfiguration, metadata: CrashtrackerMetadata, ) -> anyhow::Result { // TODO: currently create the file in write mode. Would append make more sense? @@ -64,15 +62,9 @@ fn make_receiver( &config.path_to_receiver_binary ))?; - // Write the args into the receiver. - // Use the pipe to avoid secrets ending up on the commandline - writeln!( - receiver.stdin.as_ref().unwrap(), - "{}", - serde_json::to_string(&config)? - )?; - update_metadata(metadata)?; + update_config(config)?; + Ok(receiver) } @@ -83,11 +75,10 @@ fn make_receiver( /// PRECONDITIONS: /// None /// SAFETY: -/// Crash-tracking functions are not reentrant. +/// Crash-tracking functions are not guaranteed to be reentrant. /// No other crash-handler functions should be called concurrently. /// ATOMICITY: -/// This function is not atomic. A crash during its execution may lead to -/// unexpected crash-handling behaviour. +/// This function uses a swap on an atomic pointer. pub fn update_metadata(metadata: CrashtrackerMetadata) -> anyhow::Result<()> { let metadata_string = serde_json::to_string(&metadata)?; let box_ptr = Box::into_raw(Box::new((metadata, metadata_string))); @@ -101,8 +92,32 @@ pub fn update_metadata(metadata: CrashtrackerMetadata) -> anyhow::Result<()> { Ok(()) } +/// Updates the crashtracker config for this process +/// Config is stored in a global variable and sent to the crashtracking +/// receiver when a crash occurs. +/// +/// PRECONDITIONS: +/// None +/// SAFETY: +/// Crash-tracking functions are not guaranteed to be reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function uses a swap on an atomic pointer. +pub fn update_config(config: CrashtrackerConfiguration) -> anyhow::Result<()> { + let config_string = serde_json::to_string(&config)?; + let box_ptr = Box::into_raw(Box::new((config, config_string))); + let old = CONFIG.swap(box_ptr, SeqCst); + if !old.is_null() { + // Safety: This can only come from a box above. + unsafe { + std::mem::drop(Box::from_raw(old)); + } + } + Ok(()) +} + pub fn setup_receiver( - config: &CrashtrackerConfiguration, + config: CrashtrackerConfiguration, metadata: CrashtrackerMetadata, ) -> anyhow::Result<()> { let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); @@ -115,7 +130,7 @@ pub fn setup_receiver( } pub fn replace_receiver( - config: &CrashtrackerConfiguration, + config: CrashtrackerConfiguration, metadata: CrashtrackerMetadata, ) -> anyhow::Result<()> { let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); @@ -159,23 +174,21 @@ extern "C" fn handle_posix_signal(signum: i32) { // return to old handler (chain). See comments on `restore_old_handler`. } -fn emit_and_consume_metadata(w: &mut impl Write) -> anyhow::Result<()> { - writeln!(w, "{DD_CRASHTRACK_BEGIN_METADATA}")?; - let metadata_ptr = METADATA.swap(ptr::null_mut(), SeqCst); - anyhow::ensure!(!metadata_ptr.is_null()); - let (_metadata, metadata_string) = unsafe { metadata_ptr.as_ref().context("metadata ptr")? }; +fn emit_config(w: &mut impl Write, config_str: &str) -> anyhow::Result<()> { + writeln!(w, "{DD_CRASHTRACK_BEGIN_CONFIG}")?; + writeln!(w, "{}", config_str)?; + writeln!(w, "{DD_CRASHTRACK_END_CONFIG}")?; + Ok(()) +} - writeln!(w, "{}", metadata_string)?; +fn emit_metadata(w: &mut impl Write, metadata_str: &str) -> anyhow::Result<()> { + writeln!(w, "{DD_CRASHTRACK_BEGIN_METADATA}")?; + writeln!(w, "{}", metadata_str)?; writeln!(w, "{DD_CRASHTRACK_END_METADATA}")?; - // Leak the metadata to avoid calling `drop` during a crash Ok(()) } -fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { - let receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); - anyhow::ensure!(!receiver.is_null(), "No crashtracking receiver"); - let receiver = unsafe { receiver.as_mut().context("")? }; - +fn emit_siginfo(w: &mut impl Write, signum: i32) -> anyhow::Result<()> { let signame = if signum == libc::SIGSEGV { "SIGSEGV" } else if signum == libc::SIGBUS { @@ -184,14 +197,34 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { "UNKNOWN" }; - let pipe = receiver.stdin.as_mut().unwrap(); + writeln!(w, "{DD_CRASHTRACK_BEGIN_SIGINFO}")?; + writeln!(w, "{{\"signum\": {signum}, \"signame\": \"{signame}\"}}")?; + writeln!(w, "{DD_CRASHTRACK_END_SIGINFO}")?; + Ok(()) +} + +fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { + // Leak receiver, config, and metadata to avoid calling 'drop' during a crash + let receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!receiver.is_null(), "No crashtracking receiver"); + let receiver = unsafe { receiver.as_mut().context("")? }; - emit_and_consume_metadata(pipe)?; + let config = CONFIG.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!config.is_null(), "No crashtracking config"); + let (config, config_str) = unsafe { config.as_mut().context("")? }; - writeln!(pipe, "{DD_CRASHTRACK_BEGIN_SIGINFO}")?; - writeln!(pipe, "{{\"signum\": {signum}, \"signame\": \"{signame}\"}}")?; - writeln!(pipe, "{DD_CRASHTRACK_END_SIGINFO}")?; + let metadata_ptr = METADATA.swap(ptr::null_mut(), SeqCst); + anyhow::ensure!(!metadata_ptr.is_null()); + let (_metadata, metadata_string) = unsafe { metadata_ptr.as_ref().context("metadata ptr")? }; + + let pipe = receiver + .stdin + .as_mut() + .context("Crashtracker: Can't get pipe")?; + emit_metadata(pipe, metadata_string)?; + emit_config(pipe, config_str)?; + emit_siginfo(pipe, signum)?; emit_counters(pipe)?; #[cfg(target_os = "linux")] @@ -203,7 +236,12 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { // In fact, if we look into the code here, we see mallocs. // https://doc.rust-lang.org/src/std/backtrace.rs.html#332 // Do this last, so even if it crashes, we still get the other info. - unsafe { emit_backtrace_by_frames(pipe, RESOLVE_FRAMES)? }; + unsafe { + emit_backtrace_by_frames( + pipe, + config.resolve_frames == CrashtrackerResolveFrames::ExperimentalInProcess, + )? + }; writeln!(pipe, "{DD_CRASHTRACK_DONE}")?; pipe.flush()?; @@ -218,12 +256,10 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { } // TODO, there is a small race condition here, but we can keep it small -pub fn register_crash_handlers(config: &CrashtrackerConfiguration) -> anyhow::Result<()> { +pub fn register_crash_handlers(create_alt_stack: bool) -> anyhow::Result<()> { anyhow::ensure!(OLD_HANDLERS.load(SeqCst).is_null()); unsafe { - RESOLVE_FRAMES = config.resolve_frames == CrashtrackerResolveFrames::ExperimentalInProcess; - - if config.create_alt_stack { + if create_alt_stack { set_alt_stack()?; } let sigbus = register_signal_handler(signal::SIGBUS)?; diff --git a/crashtracker/src/lib.rs b/crashtracker/src/lib.rs index 1782d1cdc..96ee6a04d 100644 --- a/crashtracker/src/lib.rs +++ b/crashtracker/src/lib.rs @@ -60,6 +60,6 @@ mod receiver; pub use api::*; pub use constants::*; pub use counters::{begin_profiling_op, end_profiling_op, ProfilingOpTypes}; -pub use crash_handler::update_metadata; +pub use crash_handler::{update_config, update_metadata}; pub use crash_info::*; pub use receiver::receiver_entry_point; diff --git a/crashtracker/src/receiver.rs b/crashtracker/src/receiver.rs index 51364a91a..442adeef3 100644 --- a/crashtracker/src/receiver.rs +++ b/crashtracker/src/receiver.rs @@ -3,7 +3,7 @@ use super::*; use anyhow::Context; -use std::{io::BufRead, time::Duration}; +use std::time::Duration; /// Receives data from a crash collector via a pipe on `stdin`, formats it into /// `CrashInfo` json, and emits it to the endpoint/file defined in `config`. @@ -15,13 +15,9 @@ use std::{io::BufRead, time::Duration}; /// See comments in [profiling/crashtracker/mod.rs] for a full architecture /// description. pub fn receiver_entry_point() -> anyhow::Result<()> { - let mut config = String::new(); - std::io::stdin().lock().read_line(&mut config)?; - let config: CrashtrackerConfiguration = serde_json::from_str(&config)?; - match receive_report(std::io::stdin().lock())? { CrashReportStatus::NoCrash => Ok(()), - CrashReportStatus::CrashReport(crash_info) => { + CrashReportStatus::CrashReport(config, crash_info) => { if config.resolve_frames == CrashtrackerResolveFrames::ExperimentalInReceiver { todo!("Processing names in the receiver is WIP"); } @@ -32,7 +28,7 @@ pub fn receiver_entry_point() -> anyhow::Result<()> { } Ok(()) } - CrashReportStatus::PartialCrashReport(_, _) => todo!(), + CrashReportStatus::PartialCrashReport(..) => todo!(), } } @@ -42,6 +38,7 @@ pub fn receiver_entry_point() -> anyhow::Result<()> { /// to the CrashReport. #[derive(Debug)] enum StdinState { + Config, Counters, Done, File(String, Vec), @@ -57,10 +54,21 @@ enum StdinState { /// Once we reach the end of a block, append the block's data to `crashinfo`. fn process_line( crashinfo: &mut CrashInfo, + config: &mut Option, line: String, state: StdinState, ) -> anyhow::Result { let next = match state { + StdinState::Config if line.starts_with(DD_CRASHTRACK_END_CONFIG) => StdinState::Waiting, + StdinState::Config => { + if config.is_some() { + // The config might contain sensitive data, don't log it. + eprint!("Unexpected double config"); + } + std::mem::swap(config, &mut Some(serde_json::from_str(&line)?)); + StdinState::Config + } + StdinState::Counters if line.starts_with(DD_CRASHTRACK_END_COUNTERS) => StdinState::Waiting, StdinState::Counters => { let v: serde_json::Value = serde_json::from_str(&line)?; @@ -95,6 +103,7 @@ fn process_line( crashinfo.set_metadata(metadata)?; StdinState::Metadata } + StdinState::SigInfo if line.starts_with(DD_CRASHTRACK_END_SIGINFO) => StdinState::Waiting, StdinState::SigInfo => { let siginfo = serde_json::from_str(&line)?; @@ -113,6 +122,7 @@ fn process_line( StdinState::StackTrace(stacktrace) } + StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_CONFIG) => StdinState::Config, StdinState::Waiting if line.starts_with(DD_CRASHTRACK_BEGIN_COUNTERS) => { StdinState::Counters } @@ -139,25 +149,26 @@ fn process_line( enum CrashReportStatus { NoCrash, - CrashReport(CrashInfo), - PartialCrashReport(CrashInfo, StdinState), + CrashReport(CrashtrackerConfiguration, CrashInfo), + PartialCrashReport(CrashtrackerConfiguration, CrashInfo, StdinState), } -/// Listens to `stdin`, reading it line by line, until +/// Listens to `stream`, reading it line by line, until /// 1. A crash-report is received, in which case it is processed for upload /// 2. `stdin` closes without a crash report (i.e. if the parent terminated /// normally) /// In the case where the parent failed to transfer a full crash-report /// (for instance if it crashed while calculating the crash-report), we return /// a PartialCrashReport. -// TODO: Make this take a buffer, rather than assuming input on stdin fn receive_report(stream: impl std::io::BufRead) -> anyhow::Result { let mut crashinfo = CrashInfo::new(); let mut stdin_state = StdinState::Waiting; + let mut config = None; + //TODO: This assumes that the input is valid UTF-8. for line in stream.lines() { let line = line?; - stdin_state = process_line(&mut crashinfo, line, stdin_state)?; + stdin_state = process_line(&mut crashinfo, &mut config, line, stdin_state)?; } if !crashinfo.crash_seen() { @@ -169,12 +180,15 @@ fn receive_report(stream: impl std::io::BufRead) -> anyhow::Result ProfileResult { } } +#[no_mangle] +#[must_use] +pub unsafe extern "C" fn ddog_prof_crashtracker_update_config( + config: CrashtrackerConfiguration, +) -> ProfileResult { + match ddog_prof_crashtracker_update_config_impl(config) { + Ok(_) => ProfileResult::Ok(true), + Err(err) => ProfileResult::Err(Error::from( + err.context("ddog_prof_crashtracker_update_config failed"), + )), + } +} + +unsafe fn ddog_prof_crashtracker_update_config_impl( + config: CrashtrackerConfiguration, +) -> anyhow::Result<()> { + let config = config.try_into()?; + datadog_crashtracker::update_config(config) +} + #[no_mangle] #[must_use] pub unsafe extern "C" fn ddog_prof_crashtracker_update_metadata( From b704dca3c14340b0ef7948b9bd36ad81a73c7c4d Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Thu, 15 Feb 2024 14:38:40 -0500 Subject: [PATCH 07/16] Comment out the test --- crashtracker/src/api.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 75de52e20..13dc29ea9 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -170,7 +170,7 @@ pub fn init( // mkdir /tmp/crashreports // look in /tmp/crashreports for the crash reports and output files // Commented out since `ignore` doesn't work in CI. -#[test] +//#[test] fn test_crash() { use crate::begin_profiling_op; use crate::update_metadata; From 25a89eafe99b4bc328fb31bd74e5744c7159aa71 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Thu, 15 Feb 2024 14:40:34 -0500 Subject: [PATCH 08/16] PR Comment --- crashtracker/src/crash_info.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crashtracker/src/crash_info.rs b/crashtracker/src/crash_info.rs index 25ab98bfd..af69e2bb4 100644 --- a/crashtracker/src/crash_info.rs +++ b/crashtracker/src/crash_info.rs @@ -112,6 +112,7 @@ impl CrashInfo { self.metadata = Some(metadata); Ok(()) } + pub fn set_siginfo(&mut self, siginfo: SigInfo) -> anyhow::Result<()> { anyhow::ensure!(self.siginfo.is_none()); self.siginfo = Some(siginfo); @@ -159,9 +160,6 @@ impl CrashInfo { } } - //let site = "intake.profile.datad0g.com/api/v2/profile"; - //let site = "datad0g.com"; - //let api_key = std::env::var("DD_API_KEY")?; let data = serde_json::to_vec(self)?; let metadata = &self.metadata.as_ref().context("Missing metadata")?; From 8457cd6b2c843478d366678a53e7fb7ff3147183 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Fri, 16 Feb 2024 17:31:27 -0500 Subject: [PATCH 09/16] Make init idempotent, as per PR request --- crashtracker/src/api.rs | 62 ++++--------- crashtracker/src/crash_handler.rs | 141 ++++++++++++++++++++++-------- 2 files changed, 120 insertions(+), 83 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 13dc29ea9..bc16a74fd 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -2,11 +2,14 @@ // This product includes software developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present Datadog, Inc. #![cfg(unix)] -use crate::crash_handler::{register_crash_handlers, setup_receiver}; +use crate::{ + crash_handler::{ensure_receiver, register_crash_handlers}, + update_config, update_metadata, +}; use super::{ counters::reset_counters, - crash_handler::{replace_receiver, restore_old_handlers, shutdown_receiver}, + crash_handler::{restore_old_handlers, shutdown_receiver, update_reciever_after_fork}, }; use ddcommon::tag::Tag; use ddcommon::Endpoint; @@ -132,15 +135,18 @@ pub fn on_fork( // The altstack (if any) is similarly unaffected by fork: // https://man7.org/linux/man-pages/man2/sigaltstack.2.html + update_metadata(metadata)?; + update_config(config.clone())?; + // See function level comment about why we do this. - replace_receiver(config, metadata)?; + update_reciever_after_fork(config)?; Ok(()) } -/// Initilize the crash-tracking infrasturcture. +/// Initialize the crash-tracking infrastructure. /// /// PRECONDITIONS: -/// This function assumes that the crash-tracker is uninitialized +/// None. /// SAFETY: /// Crash-tracking functions are not reentrant. /// No other crash-handler functions should be called concurrently. @@ -154,7 +160,9 @@ pub fn init( // Setup the receiver first, so that if there is a crash detected it has // somewhere to go. let create_alt_stack = config.create_alt_stack; - setup_receiver(config, metadata)?; + update_metadata(metadata)?; + update_config(config.clone())?; + ensure_receiver(config)?; register_crash_handlers(create_alt_stack)?; Ok(()) } @@ -170,10 +178,9 @@ pub fn init( // mkdir /tmp/crashreports // look in /tmp/crashreports for the crash reports and output files // Commented out since `ignore` doesn't work in CI. -//#[test] +#[test] fn test_crash() { use crate::begin_profiling_op; - use crate::update_metadata; use chrono::Utc; use ddcommon::parse_uri; @@ -224,42 +231,3 @@ fn test_crash() { let q = unsafe { *p }; assert_eq!(q, 3); } - -// To test on docker: -/* -docker run -it --rm -v $DATADOG_ROOT:/code -w/code ubuntu -apt update && apt upgrade -DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ -build-essential \ -ca-certificates \ -curl \ -git \ -libbz2-dev \ -libffi-dev \ -liblzma-dev \ -libncurses5-dev \ -libncursesw5-dev \ -libreadline-dev \ -libsqlite3-dev \ -libssl-dev \ -libxml2-dev \ -libxmlsec1-dev \ -llvm \ -make \ -mecab-ipadic-utf8 \ -tk-dev \ -tzdata \ -wget \ -xz-utils \ -zlib1g-dev - -curl https://sh.rustup.rs -sSf | sh -source "$HOME/.cargo/env" -cargo install cbindgen -cargo build --target-dir /tmp/libdatadog/ -mkdir /tmp/crashreports/ -git clone https://github.com/DataDog/libdatadog.git -cd libdatadog -git checkout dsn/crash-handler-api -cargo test test_crash -- --ignored -*/ diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index 6d6e23926..0505f87c3 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -20,8 +20,8 @@ use std::fs::File; use std::io::Write; use std::process::{Command, Stdio}; use std::ptr; -use std::sync::atomic::AtomicPtr; use std::sync::atomic::Ordering::SeqCst; +use std::sync::atomic::{AtomicBool, AtomicPtr}; #[derive(Debug)] struct OldHandlers { @@ -29,15 +29,19 @@ struct OldHandlers { sigsegv: SigAction, } +// These represent data used by the crashtracker. +// Using mutexes inside a signal handler is not allowed, so use `AtomicPtr` +// instead to get atomicity. +// These should always be either: null_mut, or `Box::into_raw()` +// This means that we can always clean up the memory inside one of these using +// `Box::from_raw` to recreate the box, then dropping it. +static ALTSTACK_INIT: AtomicBool = AtomicBool::new(false); static OLD_HANDLERS: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static RECEIVER: AtomicPtr = AtomicPtr::new(ptr::null_mut()); static METADATA: AtomicPtr<(CrashtrackerMetadata, String)> = AtomicPtr::new(ptr::null_mut()); static CONFIG: AtomicPtr<(CrashtrackerConfiguration, String)> = AtomicPtr::new(ptr::null_mut()); -fn make_receiver( - config: CrashtrackerConfiguration, - metadata: CrashtrackerMetadata, -) -> anyhow::Result { +fn make_receiver(config: CrashtrackerConfiguration) -> anyhow::Result { // TODO: currently create the file in write mode. Would append make more sense? let stderr = if let Some(filename) = &config.stderr_filename { File::create(filename)?.into() @@ -62,9 +66,6 @@ fn make_receiver( &config.path_to_receiver_binary ))?; - update_metadata(metadata)?; - update_config(config)?; - Ok(receiver) } @@ -116,31 +117,57 @@ pub fn update_config(config: CrashtrackerConfiguration) -> anyhow::Result<()> { Ok(()) } -pub fn setup_receiver( - config: CrashtrackerConfiguration, - metadata: CrashtrackerMetadata, -) -> anyhow::Result<()> { - let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); - let old_receiver = RECEIVER.swap(new_receiver, SeqCst); - anyhow::ensure!( - old_receiver.is_null(), - "Error registering crash handler receiver: receiver already existed" - ); +/// Ensures there is a receiver running. +/// PRECONDITIONS: +/// None +/// SAFETY: +/// Crash-tracking functions are not guaranteed to be reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function uses a compare_and_exchange on an atomic pointer. +/// If two simultaneous calls to this function occur, the first will win, +/// and the second will cleanup the redundant receiver. +pub fn ensure_receiver(config: CrashtrackerConfiguration) -> anyhow::Result<()> { + if !RECEIVER.load(SeqCst).is_null() { + // Receiver already running + return Ok(()); + } + + let new_receiver = Box::into_raw(Box::new(make_receiver(config)?)); + let res = RECEIVER.compare_exchange(ptr::null_mut(), new_receiver, SeqCst, SeqCst); + if res.is_err() { + // Race condition: Someone else setup the receiver between check and now. + // Cleanup after ourselves + // Safety: we just took it from a box above, and own the only ref since + // the compare_exchange failed. + let mut new_receiver = unsafe { Box::from_raw(new_receiver) }; + new_receiver.kill()?; + new_receiver.wait()?; + } + Ok(()) } -pub fn replace_receiver( - config: CrashtrackerConfiguration, - metadata: CrashtrackerMetadata, -) -> anyhow::Result<()> { - let new_receiver = Box::into_raw(Box::new(make_receiver(config, metadata)?)); +/// Each fork needs its own receiver. This function should run in the child +/// after a fork to spawn a new receiver for the child. +/// PRECONDITIONS: +/// None +/// SAFETY: +/// Crash-tracking functions are not guaranteed to be reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function uses a compare_and_exchange on an atomic pointer. +/// If two simultaneous calls to this function occur, the first will win, +/// and the second will cleanup the redundant receiver. +pub fn update_reciever_after_fork(config: CrashtrackerConfiguration) -> anyhow::Result<()> { + let new_receiver = Box::into_raw(Box::new(make_receiver(config)?)); let old_receiver: *mut std::process::Child = RECEIVER.swap(new_receiver, SeqCst); anyhow::ensure!( !old_receiver.is_null(), "Error updating crash handler receiver: receiver did not already exist" ); // Safety: This was only ever created out of Box::into_raw - let mut old_receiver: Box = unsafe { Box::from_raw(old_receiver) }; + let mut old_receiver = unsafe { Box::from_raw(old_receiver) }; // Close the stdin handle so we don't have two open copies // TODO: dropping the old receiver at the end of this function might do this automatically? drop(old_receiver.stdin.take()); @@ -151,16 +178,29 @@ pub fn replace_receiver( Ok(()) } +/// Shuts down a receiver, +/// PRECONDITIONS: +/// The signal handlers should be restored before removing the reciever. +/// SAFETY: +/// Crash-tracking functions are not guaranteed to be reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function uses a compare_and_exchange on an atomic pointer. +/// If two simultaneous calls to this function occur, the first will win, +/// and the second will cleanup the redundant receiver. pub fn shutdown_receiver() -> anyhow::Result<()> { - let old_receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); anyhow::ensure!( - !old_receiver.is_null(), - "Error shutting down crash handler receiver: receiver did not already exist" + OLD_HANDLERS.load(SeqCst).is_null(), + "Crashtracker signal handlers should removed before shutting down the receiver" ); - let mut old_receiver = unsafe { Box::from_raw(old_receiver) }; - - old_receiver.kill()?; - old_receiver.wait()?; + let old_receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); + if !old_receiver.is_null() { + // Safety: This only comes from a `Box::into_raw`, and was checked for + // null above + let mut old_receiver = unsafe { Box::from_raw(old_receiver) }; + old_receiver.kill()?; + old_receiver.wait()?; + } Ok(()) } @@ -255,9 +295,30 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { Ok(()) } -// TODO, there is a small race condition here, but we can keep it small +/// Registers UNIX signal handlers to detect program crashes. +/// This function can be called multiple times and will be idempotent: it will +/// only create and set the handlers once. +/// However, note the restriction below: +/// PRECONDITIONS: +/// The signal handlers should be restored before removing the reciever. +/// SAFETY: +/// Crash-tracking functions are not guaranteed to be reentrant. +/// No other crash-handler functions should be called concurrently. +/// ATOMICITY: +/// This function uses a compare_and_exchange on an atomic pointer. +/// However, setting the crash handler itself is not an atomic operation +/// and hence it is possible that a concurrent operation could see partial +/// execution of this function. +/// If a crash occurs during execution of this function, it is possible that +/// the crash handler will have been registered, but the old signal handler +/// will not yet be stored. This would lead to unexpected behaviour for the +/// user. This should only matter if something crashes concurrently with +/// this function executing. pub fn register_crash_handlers(create_alt_stack: bool) -> anyhow::Result<()> { - anyhow::ensure!(OLD_HANDLERS.load(SeqCst).is_null()); + if !OLD_HANDLERS.load(SeqCst).is_null() { + return Ok(()); + } + unsafe { if create_alt_stack { set_alt_stack()?; @@ -267,7 +328,10 @@ pub fn register_crash_handlers(create_alt_stack: bool) -> anyhow::Result<()> { let boxed_ptr = Box::into_raw(Box::new(OldHandlers { sigbus, sigsegv })); let res = OLD_HANDLERS.compare_exchange(ptr::null_mut(), boxed_ptr, SeqCst, SeqCst); - anyhow::ensure!(res.is_ok()); + anyhow::ensure!( + res.is_ok(), + "TOCTTOU error in crashtracker::register_crash_handlers" + ); } Ok(()) } @@ -293,7 +357,7 @@ unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result Ok(old_handler) } -pub fn restore_old_handlers(leak: bool) -> anyhow::Result<()> { +pub fn restore_old_handlers(inside_signal_handler: bool) -> anyhow::Result<()> { let prev = OLD_HANDLERS.swap(ptr::null_mut(), SeqCst); anyhow::ensure!(!prev.is_null()); // Safety: The only nonnull pointer stored here comes from Box::into_raw() @@ -303,7 +367,7 @@ pub fn restore_old_handlers(leak: bool) -> anyhow::Result<()> { unsafe { signal::sigaction(signal::SIGSEGV, &prev.sigsegv)? }; // We want to avoid freeing memory inside the handler, so just leak it // This is fine since we're crashing anyway at this point - if leak { + if inside_signal_handler { Box::leak(prev); } Ok(()) @@ -312,6 +376,10 @@ pub fn restore_old_handlers(leak: bool) -> anyhow::Result<()> { /// Allocates a signal altstack, and puts a guard page at the end. /// Inspired by https://github.com/rust-lang/rust/pull/69969/files unsafe fn set_alt_stack() -> anyhow::Result<()> { + if ALTSTACK_INIT.load(SeqCst) { + return Ok(()); + } + let page_size = page_size::get(); let stackp = mmap( ptr::null_mut(), @@ -339,5 +407,6 @@ unsafe fn set_alt_stack() -> anyhow::Result<()> { }; let rval = sigaltstack(&stack, ptr::null_mut()); anyhow::ensure!(rval == 0, "sigaltstack failed {rval}"); + ALTSTACK_INIT.store(true, SeqCst); Ok(()) } From 89c87cbf5793124d1068cb78b3f10f048084d0ee Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Fri, 16 Feb 2024 17:34:55 -0500 Subject: [PATCH 10/16] Add context --- crashtracker/src/crash_handler.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index 0505f87c3..a67f5036c 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -247,14 +247,14 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { // Leak receiver, config, and metadata to avoid calling 'drop' during a crash let receiver = RECEIVER.swap(ptr::null_mut(), SeqCst); anyhow::ensure!(!receiver.is_null(), "No crashtracking receiver"); - let receiver = unsafe { receiver.as_mut().context("")? }; + let receiver = unsafe { receiver.as_mut().context("No crashtracking receiver")? }; let config = CONFIG.swap(ptr::null_mut(), SeqCst); anyhow::ensure!(!config.is_null(), "No crashtracking config"); - let (config, config_str) = unsafe { config.as_mut().context("")? }; + let (config, config_str) = unsafe { config.as_ref().context("No crashtracking receiver")? }; let metadata_ptr = METADATA.swap(ptr::null_mut(), SeqCst); - anyhow::ensure!(!metadata_ptr.is_null()); + anyhow::ensure!(!metadata_ptr.is_null(), "No crashtracking metadata"); let (_metadata, metadata_string) = unsafe { metadata_ptr.as_ref().context("metadata ptr")? }; let pipe = receiver @@ -359,7 +359,7 @@ unsafe fn register_signal_handler(signal_type: signal::Signal) -> anyhow::Result pub fn restore_old_handlers(inside_signal_handler: bool) -> anyhow::Result<()> { let prev = OLD_HANDLERS.swap(ptr::null_mut(), SeqCst); - anyhow::ensure!(!prev.is_null()); + anyhow::ensure!(!prev.is_null(), "No crashtracking previous signal handlers"); // Safety: The only nonnull pointer stored here comes from Box::into_raw() let prev = unsafe { Box::from_raw(prev) }; // Safety: The value restored here was returned from a previous sigaction call From d979539ba64b793da0c275cda008b79182af1ce9 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Fri, 16 Feb 2024 17:36:33 -0500 Subject: [PATCH 11/16] better comment --- crashtracker/src/crash_handler.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index a67f5036c..fe2fab025 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -291,7 +291,8 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { // for input from the parent, while the parent waits for the child to exit. // TODO, use a polling mechanism that could recover from a crashing child receiver.wait()?; - // Calling "free" in a signal handler is dangerous, so don't do that. + // Calling "free" in a signal handler is dangerous, so we just leak the + // objects we took (receiver, metadata, config, etc) Ok(()) } From 900d98308b304669f25620df7d2ea81e439c5fd0 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Fri, 16 Feb 2024 17:37:49 -0500 Subject: [PATCH 12/16] Better includes --- crashtracker/src/api.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index bc16a74fd..45ab98c0d 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -3,13 +3,10 @@ #![cfg(unix)] use crate::{ - crash_handler::{ensure_receiver, register_crash_handlers}, - update_config, update_metadata, -}; - -use super::{ counters::reset_counters, + crash_handler::{ensure_receiver, register_crash_handlers}, crash_handler::{restore_old_handlers, shutdown_receiver, update_reciever_after_fork}, + update_config, update_metadata, }; use ddcommon::tag::Tag; use ddcommon::Endpoint; From 2f9eee7f036368e7f6c252d23b87ec7ff791f3f0 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Fri, 16 Feb 2024 17:38:27 -0500 Subject: [PATCH 13/16] turn the test back off --- crashtracker/src/api.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index 45ab98c0d..c2f7a38ac 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -175,7 +175,7 @@ pub fn init( // mkdir /tmp/crashreports // look in /tmp/crashreports for the crash reports and output files // Commented out since `ignore` doesn't work in CI. -#[test] +//#[test] fn test_crash() { use crate::begin_profiling_op; use chrono::Utc; From bd160687817487fa5c5c4e4626e0cea8e9918915 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Tue, 20 Feb 2024 19:32:30 -0500 Subject: [PATCH 14/16] typo --- crashtracker/src/crash_handler.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crashtracker/src/crash_handler.rs b/crashtracker/src/crash_handler.rs index fe2fab025..c87c110ad 100644 --- a/crashtracker/src/crash_handler.rs +++ b/crashtracker/src/crash_handler.rs @@ -159,7 +159,7 @@ pub fn ensure_receiver(config: CrashtrackerConfiguration) -> anyhow::Result<()> /// This function uses a compare_and_exchange on an atomic pointer. /// If two simultaneous calls to this function occur, the first will win, /// and the second will cleanup the redundant receiver. -pub fn update_reciever_after_fork(config: CrashtrackerConfiguration) -> anyhow::Result<()> { +pub fn update_receiver_after_fork(config: CrashtrackerConfiguration) -> anyhow::Result<()> { let new_receiver = Box::into_raw(Box::new(make_receiver(config)?)); let old_receiver: *mut std::process::Child = RECEIVER.swap(new_receiver, SeqCst); anyhow::ensure!( @@ -180,7 +180,7 @@ pub fn update_reciever_after_fork(config: CrashtrackerConfiguration) -> anyhow:: /// Shuts down a receiver, /// PRECONDITIONS: -/// The signal handlers should be restored before removing the reciever. +/// The signal handlers should be restored before removing the receiver. /// SAFETY: /// Crash-tracking functions are not guaranteed to be reentrant. /// No other crash-handler functions should be called concurrently. @@ -301,7 +301,7 @@ fn handle_posix_signal_impl(signum: i32) -> anyhow::Result<()> { /// only create and set the handlers once. /// However, note the restriction below: /// PRECONDITIONS: -/// The signal handlers should be restored before removing the reciever. +/// The signal handlers should be restored before removing the receiver. /// SAFETY: /// Crash-tracking functions are not guaranteed to be reentrant. /// No other crash-handler functions should be called concurrently. From 146e9e0b3f23b96db99788de00e2a28bd1951be0 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Tue, 20 Feb 2024 19:36:26 -0500 Subject: [PATCH 15/16] PR comments --- profiling-ffi/src/crashtracker.rs | 46 ++----------------------------- 1 file changed, 3 insertions(+), 43 deletions(-) diff --git a/profiling-ffi/src/crashtracker.rs b/profiling-ffi/src/crashtracker.rs index 336c07c06..5e22e9425 100644 --- a/profiling-ffi/src/crashtracker.rs +++ b/profiling-ffi/src/crashtracker.rs @@ -89,7 +89,7 @@ pub unsafe extern "C" fn ddog_prof_crashtracker_begin_profiling_op( match datadog_crashtracker::begin_profiling_op(op) { Ok(_) => ProfileResult::Ok(true), Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_init failed"), + err.context("ddog_prof_crashtracker_begin_profiling_op failed"), )), } } @@ -102,7 +102,7 @@ pub unsafe extern "C" fn ddog_prof_crashtracker_end_profiling_op( match datadog_crashtracker::end_profiling_op(op) { Ok(_) => ProfileResult::Ok(true), Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_init failed"), + err.context("ddog_prof_crashtracker_end_profiling_op failed"), )), } } @@ -113,51 +113,11 @@ pub unsafe extern "C" fn ddog_prof_crashtracker_shutdown() -> ProfileResult { match datadog_crashtracker::shutdown_crash_handler() { Ok(_) => ProfileResult::Ok(true), Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_init failed"), + err.context("ddog_prof_crashtracker_shutdown failed"), )), } } -#[no_mangle] -#[must_use] -pub unsafe extern "C" fn ddog_prof_crashtracker_update_config( - config: CrashtrackerConfiguration, -) -> ProfileResult { - match ddog_prof_crashtracker_update_config_impl(config) { - Ok(_) => ProfileResult::Ok(true), - Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_update_config failed"), - )), - } -} - -unsafe fn ddog_prof_crashtracker_update_config_impl( - config: CrashtrackerConfiguration, -) -> anyhow::Result<()> { - let config = config.try_into()?; - datadog_crashtracker::update_config(config) -} - -#[no_mangle] -#[must_use] -pub unsafe extern "C" fn ddog_prof_crashtracker_update_metadata( - metadata: CrashtrackerMetadata, -) -> ProfileResult { - match ddog_prof_crashtracker_update_metadata_impl(metadata) { - Ok(_) => ProfileResult::Ok(true), - Err(err) => ProfileResult::Err(Error::from( - err.context("ddog_prof_crashtracker_update_metadata failed"), - )), - } -} - -unsafe fn ddog_prof_crashtracker_update_metadata_impl( - metadata: CrashtrackerMetadata, -) -> anyhow::Result<()> { - let metadata = metadata.try_into()?; - datadog_crashtracker::update_metadata(metadata) -} - #[no_mangle] #[must_use] pub unsafe extern "C" fn ddog_prof_crashtracker_update_on_fork( From e98178f35544513a372c59095a1227533fb3e550 Mon Sep 17 00:00:00 2001 From: Daniel Schwartz-Narbonne Date: Tue, 20 Feb 2024 19:40:40 -0500 Subject: [PATCH 16/16] forgot to update name --- crashtracker/src/api.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crashtracker/src/api.rs b/crashtracker/src/api.rs index c2f7a38ac..d8cd9848e 100644 --- a/crashtracker/src/api.rs +++ b/crashtracker/src/api.rs @@ -5,7 +5,7 @@ use crate::{ counters::reset_counters, crash_handler::{ensure_receiver, register_crash_handlers}, - crash_handler::{restore_old_handlers, shutdown_receiver, update_reciever_after_fork}, + crash_handler::{restore_old_handlers, shutdown_receiver, update_receiver_after_fork}, update_config, update_metadata, }; use ddcommon::tag::Tag; @@ -136,7 +136,7 @@ pub fn on_fork( update_config(config.clone())?; // See function level comment about why we do this. - update_reciever_after_fork(config)?; + update_receiver_after_fork(config)?; Ok(()) }