Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add more siginfo_t context #726

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 6 additions & 2 deletions bin_tests/tests/crashtracker_bin_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,13 @@ fn test_crash_tracking_bin(crash_tracking_receiver_profile: BuildProfile, mode:
);
assert_eq!(
serde_json::json!({
"signum": 11,
"signame": "SIGSEGV",
"code": 1,
"codename": "SEGV_MAPERR",
"faulting_address": 0,
"pid": 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only collect the fields that are relevent for the given signal https://man7.org/linux/man-pages/man2/sigaction.2.html

"si_addr": 0,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this be in hex

"signame": "SIGSEGV",
"signum": 11,
}),
crash_payload["siginfo"]
);
Expand Down
14 changes: 13 additions & 1 deletion crashtracker-ffi/src/crash_info/datatypes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,19 +229,31 @@ impl<'a> TryFrom<&StackFrame<'a>> for datadog_crashtracker::StackFrame {
pub struct SigInfo<'a> {
pub signum: u64,
pub signame: CharSlice<'a>,
pub pid: i32,
pub code: i32,
pub code_name: CharSlice<'a>,
pub si_addr: usize,
}

impl<'a> TryFrom<SigInfo<'a>> for datadog_crashtracker::SigInfo {
type Error = anyhow::Error;

fn try_from(value: SigInfo<'a>) -> Result<Self, Self::Error> {
let signum = value.signum;
let signame = option_from_char_slice(value.signame)?;
let signame = value.signame.to_string();
let code = value.code;
let codename = value.code_name.to_string();
let pid = value.pid;
let faulting_address = None; // TODO: Expose this to FFI
let si_addr = value.si_addr;
Ok(Self {
signum,
signame,
pid,
code,
codename,
faulting_address,
si_addr,
})
}
}
Expand Down
1 change: 1 addition & 0 deletions crashtracker/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ rand = "0.8.5"

[dev-dependencies]
tempfile = { version = "3.3" }
regex = "1.5"
19 changes: 5 additions & 14 deletions crashtracker/src/collector/crash_handler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,10 @@ pub fn configure_receiver(config: CrashtrackerReceiverConfig) {
}
}

extern "C" fn handle_posix_sigaction(signum: i32, sig_info: *mut siginfo_t, ucontext: *mut c_void) {
extern "C" fn handle_posix_sigaction(signum: i32, siginfo: *mut siginfo_t, ucontext: *mut c_void) {
// Handle the signal. Note this has a guard to ensure that we only generate
// one crash report per process.
let _ = handle_posix_signal_impl(signum, sig_info);
let _ = handle_posix_signal_impl(siginfo);

// Once we've handled the signal, chain to any previous handlers.
// SAFETY: This was created by [register_crash_handlers]. There is a tiny
Expand Down Expand Up @@ -430,11 +430,11 @@ extern "C" fn handle_posix_sigaction(signum: i32, sig_info: *mut siginfo_t, ucon
}
SigHandler::SigIgn => (), // Return and ignore the signal.
SigHandler::Handler(f) => f(signum),
SigHandler::SigAction(f) => f(signum, sig_info, ucontext),
SigHandler::SigAction(f) => f(signum, siginfo, ucontext),
};
}

fn handle_posix_signal_impl(signum: i32, sig_info: *mut siginfo_t) -> anyhow::Result<()> {
fn handle_posix_signal_impl(siginfo: *mut siginfo_t) -> anyhow::Result<()> {
// If this is a SIGSEGV signal, it could be called due to a stack overflow. In that case, since
// this signal allocates to the stack and cannot guarantee it is running without SA_NODEFER, it
// is possible that we will re-emit the signal. Contemporary unices handle this just fine (no
Expand Down Expand Up @@ -476,14 +476,6 @@ fn handle_posix_signal_impl(signum: i32, sig_info: *mut siginfo_t) -> anyhow::Re
let timeout_ms = config.timeout_ms;
let start_time = Instant::now(); // This is the time at which the signal was received

// Derive the faulting address from `sig_info`
let faulting_address: Option<usize> =
if !sig_info.is_null() && (signum == libc::SIGSEGV || signum == libc::SIGBUS) {
unsafe { Some((*sig_info).si_addr() as usize) }
} else {
None
};

// During the execution of this signal handler, block ALL other signals, especially because we
// cannot control whether or not we run with SA_NODEFER (crashtracker might have been chained).
// The especially problematic signals are SIGCHLD and SIGPIPE, which are possibly delivered due
Expand All @@ -509,8 +501,7 @@ fn handle_posix_signal_impl(signum: i32, sig_info: *mut siginfo_t) -> anyhow::Re
config,
config_str,
metadata_string,
signum,
faulting_address,
unsafe { *siginfo },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Calling by value not reference?

);

let _ = unix_stream.flush();
Expand Down
131 changes: 108 additions & 23 deletions crashtracker/src/collector/emitters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: Apache-2.0

use crate::collector::counters::emit_counters;
use crate::collector::siginfo_strings;
use crate::collector::spans::emit_spans;
use crate::collector::spans::emit_traces;
use crate::shared::constants::*;
Expand Down Expand Up @@ -100,12 +101,11 @@ pub(crate) fn emit_crashreport(
config: &CrashtrackerConfiguration,
config_str: &str,
metadata_string: &str,
signum: i32,
faulting_address: Option<usize>,
siginfo: libc::siginfo_t,
) -> anyhow::Result<()> {
emit_metadata(pipe, metadata_string)?;
emit_config(pipe, config_str)?;
emit_siginfo(pipe, signum, faulting_address)?;
emit_siginfo(pipe, siginfo)?;
emit_procinfo(pipe)?;
pipe.flush()?;
emit_counters(pipe)?;
Expand Down Expand Up @@ -164,28 +164,54 @@ fn emit_proc_self_maps(w: &mut impl Write) -> anyhow::Result<()> {
Ok(())
}

fn emit_siginfo(
w: &mut impl Write,
signum: i32,
faulting_address: Option<usize>,
) -> anyhow::Result<()> {
let signame = if signum == libc::SIGSEGV {
"SIGSEGV"
} else if signum == libc::SIGBUS {
"SIGBUS"
} else {
"UNKNOWN"
};
/// Serialize a siginfo_t to the given handle.
///
/// This function eagerly emits various parts of the siginfo_t, even though some components are
/// only circumstantially useful. The kernel generally provides sane defaults to unused
/// members, and for downstream consumers it is probably easier to present a consistent keyspace
/// than to conditionally emit fields.
///
/// In particular, note that `si_addr` is a repeat of `faulting_address`, the latter of which is
/// emitted only for `SIGSEGV` signals. This data layout should be considered unstable until
/// canonized in a data format RFC.
///
/// PRECONDITIONS:
/// This function assumes that the crash-tracker is initialized.
/// SAFETY:
/// Crash-tracking functions are not reentrant.
/// No other crash-handler functions should be called concurrently.
/// ATOMICITY:
/// This function is not atomic. A crash during its execution may lead to
/// unexpected crash-handling behaviour.
/// SIGNAL SAFETY:
/// This function is careful to only write to the handle, without doing any
/// unnecessary mutexes or memory allocation.
pub(crate) fn emit_siginfo(w: &mut impl Write, siginfo: libc::siginfo_t) -> anyhow::Result<()> {
let signame = siginfo_strings::get_signal_name(&siginfo);
let codename = siginfo_strings::get_code_name(&siginfo);
let si_addr = unsafe { siginfo.si_addr() } as usize;
let si_signo = siginfo.si_signo;
let si_pid = unsafe { siginfo.si_pid() };
let si_code = siginfo.si_code;

writeln!(w, "{DD_CRASHTRACK_BEGIN_SIGINFO}")?;
if let Some(addr) = faulting_address {
writeln!(
w,
"{{\"signum\": {signum}, \"signame\": \"{signame}\", \"faulting_address\": {addr}}}"
)?;
} else {
writeln!(w, "{{\"signum\": {signum}, \"signame\": \"{signame}\"}}")?;
};
write!(w, "{{")?;
write!(w, "\"signum\": {si_signo}, ")?;
write!(w, "\"signame\": \"{signame}\", ")?;
write!(w, "\"pid\": {si_pid}, ")?;
write!(w, "\"code\": {si_code}, ")?;
write!(w, "\"codename\": \"{codename}\", ")?;

// For now, we'll double-emit data which is backed by `si_addr` because the telemetry backend
// won't do anything fancy with this payload, and `faulting_address` has significance to
// developers who are querying for specific faults. However, we should probably decide how
// this gets canonized and which component is responsible.
// For now, double-ship the address.
if siginfo.si_signo == libc::SIGSEGV {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not SIGBUS?

write!(w, "\"faulting_address\": {si_addr}, ")?;
}
write!(w, "\"si_addr\": {si_addr}")?; // last field, no trailing comma
writeln!(w, "}}")?;
writeln!(w, "{DD_CRASHTRACK_END_SIGINFO}")?;
Ok(())
}
Expand Down Expand Up @@ -234,3 +260,62 @@ fn emit_text_file(w: &mut impl Write, path: &str) -> anyhow::Result<()> {
w.flush()?;
Ok(())
}

#[cfg(test)]
struct JsonCaptureWriter {
buffer: Vec<u8>,
}

#[cfg(test)]
impl JsonCaptureWriter {
fn new() -> Self {
JsonCaptureWriter { buffer: Vec::new() }
}

pub fn get_json(&self) -> String {
use regex::Regex;
let output = String::from_utf8_lossy(&self.buffer);
let re = Regex::new(r"DD_CRASHTRACK_\w+").unwrap();
re.replace_all(&output, "").trim().to_string()
}
}

#[cfg(test)]
impl Write for JsonCaptureWriter {
fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
self.buffer.write(buf)
}

fn flush(&mut self) -> std::io::Result<()> {
// NOP, but needed for interface
Ok(())
}
}

#[test]
fn test_sigaction_siginfo() -> anyhow::Result<()> {
// This is a simple test to make sure that a valid SigInfo can be created from a
// libc::siginfo_t on the current platform.
use crate::SigInfo;
let mut siginfo: libc::siginfo_t = unsafe { std::mem::zeroed() };
siginfo.si_signo = libc::SIGSEGV;
siginfo.si_code = 1;

let mut writer = JsonCaptureWriter::new();
emit_siginfo(&mut writer, siginfo)?;
let siginfo_json_str = writer.get_json();

// Is it valid JSON?
let _: serde_json::Value = serde_json::from_str(&siginfo_json_str)?;

// Is it a valid SigInfo?
let siginfo: SigInfo = serde_json::from_str(&siginfo_json_str)?;

// Sanity check for the fields
assert_eq!(siginfo.signum, libc::SIGSEGV as u64);
assert_eq!(siginfo.signame, "SIGSEGV");
assert_eq!(siginfo.pid, 0);
assert_eq!(siginfo.code, 1);
assert_eq!(siginfo.codename, "SEGV_MAPERR");
Ok(())
}
1 change: 1 addition & 0 deletions crashtracker/src/collector/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mod counters;
mod crash_handler;
mod emitters;
mod saguard;
mod siginfo_strings;
mod spans;

pub use api::*;
Expand Down
110 changes: 110 additions & 0 deletions crashtracker/src/collector/siginfo_strings.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/
// SPDX-License-Identifier: Apache-2.0
#![cfg(unix)]

pub fn get_signal_name(siginfo: &libc::siginfo_t) -> &'static str {
let signal = siginfo.si_signo;
match signal {
libc::SIGSEGV => "SIGSEGV",
libc::SIGBUS => "SIGBUS",
libc::SIGILL => "SIGILL",
libc::SIGABRT => "SIGABRT",
libc::SIGFPE => "SIGFPE",
libc::SIGTRAP => "SIGTRAP",
libc::SIGSYS => "SIGSYS",
_ => "UNKNOWN",
}
}

// These are defined in siginfo.h
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these stable across OS/Architecture?

// They are only derived here because there doesn't appear to be a crate that provides them in a
// comparable way. There's no other reason for this, so it can be replaced as soon as a better
// solution is found.
// (this only copies the most common, actionable values)
pub mod siginfo_code {
pub const SI_USER: i32 = 0;
pub const SI_KERNEL: i32 = 0x80;
pub const SI_QUEUE: i32 = -1;
pub const SI_TIMER: i32 = -2;
pub const SI_TKILL: i32 = -6;

pub mod ill {
pub const ILLOPC: i32 = 1;
pub const ILLOPN: i32 = 2;
pub const ILLADR: i32 = 3;
pub const ILLTRP: i32 = 4;
pub const PRVOPC: i32 = 5;
pub const PRVREG: i32 = 6;
pub const COPROC: i32 = 7;
pub const BADSTK: i32 = 8;
}

pub mod segv {
pub const MAPERR: i32 = 1;
pub const ACCERR: i32 = 2;
}

pub mod bus {
pub const ADRALN: i32 = 1;
pub const ADRERR: i32 = 2;
pub const OBJERR: i32 = 3;
}

pub mod trap {
pub const BRKPT: i32 = 1;
pub const TRACE: i32 = 2;
}

pub mod sys {
pub const SECCOMP: i32 = 1;
}
}

pub fn get_code_name(siginfo: &libc::siginfo_t) -> &'static str {
let signo = siginfo.si_signo;

// Strip out the high byte for PTRACE_EVENT_* flags (defensive coding)
let code = siginfo.si_code & 0x7f;

match signo {
libc::SIGILL => match code {
siginfo_code::ill::ILLOPC => "ILL_ILLOPC",
siginfo_code::ill::ILLOPN => "ILL_ILLOPN",
siginfo_code::ill::ILLADR => "ILL_ILLADR",
siginfo_code::ill::ILLTRP => "ILL_ILLTRP",
siginfo_code::ill::PRVOPC => "ILL_PRVOPC",
siginfo_code::ill::PRVREG => "ILL_PRVREG",
siginfo_code::ill::COPROC => "ILL_COPROC",
siginfo_code::ill::BADSTK => "ILL_BADSTK",
_ => "UNKNOWN_SIGILL",
},
libc::SIGSEGV => match code {
siginfo_code::segv::MAPERR => "SEGV_MAPERR",
siginfo_code::segv::ACCERR => "SEGV_ACCERR",
_ => "UNKNOWN_SIGSEGV",
},
libc::SIGBUS => match code {
siginfo_code::bus::ADRALN => "BUS_ADRALN",
siginfo_code::bus::ADRERR => "BUS_ADRERR",
siginfo_code::bus::OBJERR => "BUS_OBJERR",
_ => "UNKNOWN_SIGBUS",
},
libc::SIGTRAP => match code {
siginfo_code::trap::BRKPT => "TRAP_BRKPT",
siginfo_code::trap::TRACE => "TRAP_TRACE",
_ => "UNKNOWN_SIGTRAP",
},
libc::SIGSYS => match code {
siginfo_code::sys::SECCOMP => "SYS_SECCOMP",
_ => "UNKNOWN_SIGSYS",
},
_ => match code {
siginfo_code::SI_USER => "SI_USER",
siginfo_code::SI_KERNEL => "SI_KERNEL",
siginfo_code::SI_QUEUE => "SI_QUEUE",
siginfo_code::SI_TIMER => "SI_TIMER",
siginfo_code::SI_TKILL => "SI_TKILL",
_ => "UNKNOWN_GENERAL",
},
}
}
Loading
Loading