diff --git a/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp index 378caf769ef..5c290ca5d33 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/include/stack_renderer.hpp @@ -48,6 +48,9 @@ class StackRenderer : public RendererInterface virtual void render_cpu_time(microsecond_t cpu_time_us) override; virtual void render_stack_end() override; virtual bool is_valid() override; + + private: + void maybe_collect_exception_sample(PyThreadState* tstate); }; } // namespace Datadog diff --git a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp index 9ee7a3813d0..8d139386d8a 100644 --- a/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp +++ b/ddtrace/internal/datadog/profiling/stack_v2/src/stack_renderer.cpp @@ -1,8 +1,174 @@ #include "stack_renderer.hpp" + #include "utf8_validate.hpp" +#include + using namespace Datadog; +std::string +get_exc_type_name(PyObject* exc_type) +{ + if (exc_type == nullptr) { + return {}; + } + PyObject* module_attr = PyObject_GetAttrString(exc_type, "__module__"); + if (module_attr == nullptr) { + return {}; + } + PyObject* name_attr = PyObject_GetAttrString(exc_type, "__name__"); + if (name_attr == nullptr) { + Py_DECREF(module_attr); + return {}; + } + + const char* module_str = PyUnicode_AsUTF8(module_attr); + const char* name_str = PyUnicode_AsUTF8(name_attr); + if (module_str == nullptr || name_str == nullptr) { + Py_DECREF(module_attr); + Py_DECREF(name_attr); + return nullptr; + } + + std::string result = std::string(module_str) + "." + std::string(name_str); + + Py_DECREF(module_attr); + Py_DECREF(name_attr); + + return result; +} + +void +push_exception_frames(Sample* sample, PyTracebackObject* tb) +{ + while (tb != nullptr) { + PyFrameObject* frame = tb->tb_frame; + if (frame != nullptr) { +#if PY_VERSION_HEX >= 0x030b0000 + // Since 3.11, members of PyFrameObject are removed from the public C API. + PyCodeObject* code = PyFrame_GetCode(frame); +#else + PyCoeObject* code = frame->f_code; +#endif + if (code != nullptr) { + PyObject* filename = code->co_filename; + PyObject* name = code->co_name; + if (filename != nullptr && name != nullptr) { + const char* filename_str = PyUnicode_AsUTF8(filename); + const char* name_str = PyUnicode_AsUTF8(name); + if (filename_str != nullptr && name_str != nullptr) { + ddup_push_frame(sample, name_str, filename_str, 0, code->co_firstlineno); + } + } +#if PY_VERSION_HEX >= 0x030b0000 + Py_DECREF(code); +#endif + } + } + tb = tb->tb_next; + } +} + +void +StackRenderer::maybe_collect_exception_sample(PyThreadState* tstate) +{ + _PyErr_StackItem* exc_info = _PyErr_GetTopmostException(tstate); + + if (exc_info == nullptr) { + return; + } + + if (exc_info->exc_value == nullptr || exc_info->exc_value == Py_None) { + return; + } + + PyObject* exc_value = exc_info->exc_value; + + std::cout << "Retrieved exc_info from tstate" << std::endl; + + PyTypeObject* exc_type; + PyObject* exc_traceback; + + // // Python 3.12 changed the exception handling API to use a single PyObject* instead of a tuple. + // #if PY_VERSION_HEX >= 0x030c0000 + // The following lines of code are equivalent to the following Python code: + // exc_type = type(exc_info) + + exc_type = Py_TYPE(exc_value); + // #else + // // The following lines of code are equivalent to the following Python code: + // // exc_type, _, _ = exc_info + // exc_type = exc_info->exc_type; + + // #endif + + if (exc_type == nullptr) { + return; + } + + std::cout << "Retrieved exc_type from exc_info" << std::endl; + + // #if PY_VERSION_HEX >= 0x030c0000 + // exc_traceback = get_attr(exc_info, "__traceback__", None) + const char* traceback_attr_name = "__traceback__"; + exc_traceback = PyObject_GetAttrString(exc_value, traceback_attr_name); + + std::cout << "returned from PyOBject_GetAttrString" << std::endl; + // #else + // // _, _, exctraceback = exc_info + // exc_traceback = exc_info->exc_traceback; + // #endif + + if (exc_traceback == nullptr) { + std::cout << "exc_traceback is nullptr" << std::endl; + // #if PY_VERSION_HEX >= 0x030c0000 + // Not sure we really need to call Py_DECREF in this function at all, + // because AFAIK tstate is already copied to this thread via + // process_vm_readv() or equivalent system calls and we don't need to + // worry about reference counting. + Py_DECREF(exc_type); // PyObject_Type returns a new reference + PyErr_Clear(); // Clear any error set by PyObject_GetAttrString + // #endif + return; + } + + std::cout << "Retrieved exc_traceback from exc_info" << std::endl; + + // Format exc_type as exc_type.__module__ + '.' + exc_type.__name__ + std::string exc_type_name = get_exc_type_name(reinterpret_cast(exc_type)); + if (exc_type_name.empty()) { + // #if PY_VERSION_HEX >= 0x030c0000 + Py_DECREF(exc_type); + // #endif + return; + } + + std::cout << "Exception type name: " << exc_type_name << std::endl; + + // Now we have the exception type name, we can start building the exception sample + Sample* exc_sample = ddup_start_sample(); + if (exc_sample == nullptr) { + std::cerr << "Failed to create a sample. Stack v2 sampler will be disabled." << std::endl; + // #if PY_VERSION_HEX >= 0x030c0000 + Py_DECREF(exc_type); + // #endif + return; + } + ddup_push_monotonic_ns(exc_sample, thread_state.now_time_ns); + ddup_push_threadinfo(exc_sample, + static_cast(thread_state.id), + static_cast(thread_state.native_id), + thread_state.name); + ddup_push_exceptioninfo(exc_sample, exc_type_name, 1); + push_exception_frames(exc_sample, reinterpret_cast(exc_traceback)); + ddup_flush_sample(exc_sample); + ddup_drop_sample(exc_sample); + +#if PY_VERSION_HEX >= 0x030c0000 + Py_DECREF(exc_type); +#endif +} + void StackRenderer::render_message(std::string_view msg) { @@ -52,6 +218,8 @@ StackRenderer::render_thread_begin(PyThreadState* tstate, // Finalize the thread information we have ddup_push_threadinfo(sample, static_cast(thread_id), static_cast(native_id), name); ddup_push_walltime(sample, thread_state.wall_time_ns, 1); + + maybe_collect_exception_sample(tstate); } void diff --git a/tests/profiling_v2/collector/test_stack.py b/tests/profiling_v2/collector/test_stack.py index da5a9078642..6f8c6bba843 100644 --- a/tests/profiling_v2/collector/test_stack.py +++ b/tests/profiling_v2/collector/test_stack.py @@ -166,3 +166,33 @@ def test_push_non_web_span(): # trace_endpoint is not set for non-web spans ), ) + + +@pytest.mark.subprocess(env=dict(DD_PROFILING_STACK_V2_ENABLED="true")) +def test_exception_collection(): + import os + import time + + from ddtrace.internal.datadog.profiling import ddup + from ddtrace.profiling.collector import stack + from tests.profiling.collector import pprof_utils + + test_name = "test_exception_collection" + pprof_prefix = "/tmp/" + test_name + output_filename = pprof_prefix + "." + str(os.getpid()) + + assert ddup.is_available + ddup.config(env="test", service=test_name, version="my_version", output_filename=pprof_prefix) + ddup.start() + + with stack.StackCollector(None, ignore_profiler=True): + try: + raise ValueError("hello") + except Exception: + time.sleep(1) + + ddup.upload() + + profile = pprof_utils.parse_profile(output_filename) + samples = pprof_utils.get_samples_with_label_key(profile, "exception type") + assert len(samples) > 0