-
Notifications
You must be signed in to change notification settings - Fork 381
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add unsafe api calls checker to track down issues such as #4195
This checker is used to detect accidental thread scheduling switching points happening during profiling sampling. See the bigger comment in unsafe_api_calls_check.h . I was able to check that this checker correctly triggers for the bug in #4195, and also the bug I'm going to fix next, which is the use of `rb_hash_lookup` in the otel context reading code.
- Loading branch information
Showing
6 changed files
with
159 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
ext/datadog_profiling_native_extension/unsafe_api_calls_check.c
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
#include <ruby.h> | ||
#include <ruby/debug.h> | ||
#include <stdbool.h> | ||
|
||
#include "datadog_ruby_common.h" | ||
#include "unsafe_api_calls_check.h" | ||
#include "extconf.h" | ||
|
||
static bool inside_unsafe_context = false; | ||
|
||
#ifndef NO_POSTPONED_TRIGGER | ||
static rb_postponed_job_handle_t check_for_unsafe_api_calls_handle; | ||
#endif | ||
|
||
static void check_for_unsafe_api_calls(DDTRACE_UNUSED void *_unused); | ||
|
||
void unsafe_api_calls_check_init(void) { | ||
#ifndef NO_POSTPONED_TRIGGER | ||
int unused_flags = 0; | ||
|
||
check_for_unsafe_api_calls_handle = rb_postponed_job_preregister(unused_flags, check_for_unsafe_api_calls, NULL); | ||
|
||
if (check_for_unsafe_api_calls_handle == POSTPONED_JOB_HANDLE_INVALID) { | ||
rb_raise(rb_eRuntimeError, "Failed to register check_for_unsafe_api_calls_handle postponed job (got POSTPONED_JOB_HANDLE_INVALID)"); | ||
} | ||
#endif | ||
} | ||
|
||
void debug_enter_unsafe_context(void) { | ||
inside_unsafe_context = true; | ||
|
||
#ifndef NO_POSTPONED_TRIGGER | ||
rb_postponed_job_trigger(check_for_unsafe_api_calls_handle); | ||
#else | ||
rb_postponed_job_register(0, check_for_unsafe_api_calls, NULL); | ||
#endif | ||
} | ||
|
||
void debug_leave_unsafe_context(void) { | ||
inside_unsafe_context = false; | ||
} | ||
|
||
static void check_for_unsafe_api_calls(DDTRACE_UNUSED void *_unused) { | ||
if (inside_unsafe_context) rb_bug( | ||
"Datadog Ruby profiler detected callback nested inside sample. Please report this at https://github.com/datadog/dd-trace-rb/blob/master/CONTRIBUTING.md#found-a-bug" | ||
); | ||
} |
25 changes: 25 additions & 0 deletions
25
ext/datadog_profiling_native_extension/unsafe_api_calls_check.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
#pragma once | ||
|
||
// This checker is used to detect accidental thread scheduling switching points happening during profiling sampling. | ||
// | ||
// Specifically, when the profiler is sampling, we're never supposed to call into Ruby code (e.g. methods | ||
// implemented using Ruby code) or allocate Ruby objects. | ||
// That's because those events introduce thread switch points, and really we don't the VM switching between threads | ||
// in the middle of the profiler sampling. | ||
// This includes raising exceptions, unless we're trying to stop the profiler, and even then we must be careful. | ||
// | ||
// The above is especially true in situations such as GC profiling or allocation/heap profiling, as in those situations | ||
// we can even crash the Ruby VM if we switch away at the wrong time. | ||
// | ||
// The below APIs can be used to detect these situations. They work by relying on the following observation: | ||
// in most (all?) thread switch points, Ruby will check for interrupts and run the postponed jobs. | ||
// | ||
// Thus, if we set a flag while we're sampling (inside_unsafe_context), trigger the postponed job, and then only unset | ||
// the flag after sampling, he correct thing to happen is that the postponed job should never see the flag. | ||
// | ||
// If, however, we have a bug and there's a thread switch point, our postponed job will see the flag and immediately | ||
// stop the Ruby VM before further damage happens (and hopefully giving us a stack trace clearly pointing to the culprit). | ||
|
||
void unsafe_api_calls_check_init(void); | ||
void debug_enter_unsafe_context(void); | ||
void debug_leave_unsafe_context(void); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters