Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(profiling): exception sampling for stack v2 #10907

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ class StackRenderer : public RendererInterface
virtual void render_cpu_time(microsecond_t cpu_time_us) override;
virtual void render_stack_end() override;
virtual bool is_valid() override;

private:
void maybe_collect_exception_sample(PyThreadState* tstate);
};

} // namespace Datadog
168 changes: 168 additions & 0 deletions ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,174 @@
#include "stack_renderer.hpp"

#include "utf8_validate.hpp"

#include <Python.h>

using namespace Datadog;

std::string
get_exc_type_name(PyObject* exc_type)
{
if (exc_type == nullptr) {
return {};
}
PyObject* module_attr = PyObject_GetAttrString(exc_type, "__module__");
if (module_attr == nullptr) {
return {};
}
PyObject* name_attr = PyObject_GetAttrString(exc_type, "__name__");
if (name_attr == nullptr) {
Py_DECREF(module_attr);
return {};
}

const char* module_str = PyUnicode_AsUTF8(module_attr);
const char* name_str = PyUnicode_AsUTF8(name_attr);
if (module_str == nullptr || name_str == nullptr) {
Py_DECREF(module_attr);
Py_DECREF(name_attr);
return nullptr;
}

std::string result = std::string(module_str) + "." + std::string(name_str);

Py_DECREF(module_attr);
Py_DECREF(name_attr);

return result;
}

void
push_exception_frames(Sample* sample, PyTracebackObject* tb)
{
while (tb != nullptr) {
PyFrameObject* frame = tb->tb_frame;
if (frame != nullptr) {
#if PY_VERSION_HEX >= 0x030b0000
// Since 3.11, members of PyFrameObject are removed from the public C API.
PyCodeObject* code = PyFrame_GetCode(frame);
#else
PyCoeObject* code = frame->f_code;
#endif
if (code != nullptr) {
PyObject* filename = code->co_filename;
PyObject* name = code->co_name;
if (filename != nullptr && name != nullptr) {
const char* filename_str = PyUnicode_AsUTF8(filename);
const char* name_str = PyUnicode_AsUTF8(name);
if (filename_str != nullptr && name_str != nullptr) {
ddup_push_frame(sample, name_str, filename_str, 0, code->co_firstlineno);
}
}
#if PY_VERSION_HEX >= 0x030b0000
Py_DECREF(code);
#endif
}
}
tb = tb->tb_next;
}
}

void
StackRenderer::maybe_collect_exception_sample(PyThreadState* tstate)
{
_PyErr_StackItem* exc_info = _PyErr_GetTopmostException(tstate);

if (exc_info == nullptr) {
return;
}

if (exc_info->exc_value == nullptr || exc_info->exc_value == Py_None) {
return;
}

PyObject* exc_value = exc_info->exc_value;

std::cout << "Retrieved exc_info from tstate" << std::endl;

PyTypeObject* exc_type;
PyObject* exc_traceback;

// // Python 3.12 changed the exception handling API to use a single PyObject* instead of a tuple.
// #if PY_VERSION_HEX >= 0x030c0000
// The following lines of code are equivalent to the following Python code:
// exc_type = type(exc_info)

exc_type = Py_TYPE(exc_value);
// #else
// // The following lines of code are equivalent to the following Python code:
// // exc_type, _, _ = exc_info
// exc_type = exc_info->exc_type;

// #endif

if (exc_type == nullptr) {
return;
}

std::cout << "Retrieved exc_type from exc_info" << std::endl;

// #if PY_VERSION_HEX >= 0x030c0000
// exc_traceback = get_attr(exc_info, "__traceback__", None)
const char* traceback_attr_name = "__traceback__";
exc_traceback = PyObject_GetAttrString(exc_value, traceback_attr_name);

std::cout << "returned from PyOBject_GetAttrString" << std::endl;
// #else
// // _, _, exctraceback = exc_info
// exc_traceback = exc_info->exc_traceback;
// #endif

if (exc_traceback == nullptr) {
std::cout << "exc_traceback is nullptr" << std::endl;
// #if PY_VERSION_HEX >= 0x030c0000
// Not sure we really need to call Py_DECREF in this function at all,
// because AFAIK tstate is already copied to this thread via
// process_vm_readv() or equivalent system calls and we don't need to
// worry about reference counting.
Py_DECREF(exc_type); // PyObject_Type returns a new reference
PyErr_Clear(); // Clear any error set by PyObject_GetAttrString
// #endif
return;
}

std::cout << "Retrieved exc_traceback from exc_info" << std::endl;

// Format exc_type as exc_type.__module__ + '.' + exc_type.__name__
std::string exc_type_name = get_exc_type_name(reinterpret_cast<PyObject*>(exc_type));
if (exc_type_name.empty()) {
// #if PY_VERSION_HEX >= 0x030c0000
Py_DECREF(exc_type);
// #endif
return;
}

std::cout << "Exception type name: " << exc_type_name << std::endl;

// Now we have the exception type name, we can start building the exception sample
Sample* exc_sample = ddup_start_sample();
if (exc_sample == nullptr) {
std::cerr << "Failed to create a sample. Stack v2 sampler will be disabled." << std::endl;
// #if PY_VERSION_HEX >= 0x030c0000
Py_DECREF(exc_type);
// #endif
return;
}
ddup_push_monotonic_ns(exc_sample, thread_state.now_time_ns);
ddup_push_threadinfo(exc_sample,
static_cast<int64_t>(thread_state.id),
static_cast<int64_t>(thread_state.native_id),
thread_state.name);
ddup_push_exceptioninfo(exc_sample, exc_type_name, 1);
push_exception_frames(exc_sample, reinterpret_cast<PyTracebackObject*>(exc_traceback));
ddup_flush_sample(exc_sample);
ddup_drop_sample(exc_sample);

#if PY_VERSION_HEX >= 0x030c0000
Py_DECREF(exc_type);
#endif
}

void
StackRenderer::render_message(std::string_view msg)
{
Expand Down Expand Up @@ -52,6 +218,8 @@ StackRenderer::render_thread_begin(PyThreadState* tstate,
// Finalize the thread information we have
ddup_push_threadinfo(sample, static_cast<int64_t>(thread_id), static_cast<int64_t>(native_id), name);
ddup_push_walltime(sample, thread_state.wall_time_ns, 1);

maybe_collect_exception_sample(tstate);
}

void
Expand Down
30 changes: 30 additions & 0 deletions tests/profiling_v2/collector/test_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,33 @@ def test_push_non_web_span():
# trace_endpoint is not set for non-web spans
),
)


@pytest.mark.subprocess(env=dict(DD_PROFILING_STACK_V2_ENABLED="true"))
def test_exception_collection():
import os
import time

from ddtrace.internal.datadog.profiling import ddup
from ddtrace.profiling.collector import stack
from tests.profiling.collector import pprof_utils

test_name = "test_exception_collection"
pprof_prefix = "/tmp/" + test_name
output_filename = pprof_prefix + "." + str(os.getpid())

assert ddup.is_available
ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix)
ddup.start()

with stack.StackCollector(None, ignore_profiler=True):
try:
raise ValueError("hello")
except Exception:
time.sleep(1)

ddup.upload()

profile = pprof_utils.parse_profile(output_filename)
samples = pprof_utils.get_samples_with_label_key(profile, "exception type")
assert len(samples) > 0