From 40e8072ca6e899c59645c536b4c5efccfdbbf366 Mon Sep 17 00:00:00 2001 From: Gregory LEOCADIE Date: Fri, 29 Nov 2024 17:34:49 +0100 Subject: [PATCH] [Crashtracker] Use blazesym API to retrieve buildid (#6347) --- build/cmake/FindLibdatadog.cmake | 10 +- .../CrashReportingLinux.cpp | 127 ++++++++++-------- .../CrashReportingLinux.h | 6 +- .../CrashReporting.cpp | 21 +++ .../Datadog.Profiler.Native/CrashReporting.h | 55 ++++++++ .../CreatedumpTests.cs | 57 +++++--- ...Trace.Tools.dd_dotnet.ArtifactTests.csproj | 1 + 7 files changed, 195 insertions(+), 82 deletions(-) diff --git a/build/cmake/FindLibdatadog.cmake b/build/cmake/FindLibdatadog.cmake index 46a839460547..0e57e1b68df3 100644 --- a/build/cmake/FindLibdatadog.cmake +++ b/build/cmake/FindLibdatadog.cmake @@ -4,22 +4,22 @@ endif() include(FetchContent) -set(LIBDATADOG_VERSION "v13.1.0" CACHE STRING "libdatadog version") +set(LIBDATADOG_VERSION "v14.3.1" CACHE STRING "libdatadog version") if (CMAKE_SYSTEM_PROCESSOR STREQUAL aarch64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL arm64) if (DEFINED ENV{IsAlpine} AND "$ENV{IsAlpine}" MATCHES "true") - set(SHA256_LIBDATADOG "9cddbc9ece4c2fe9a1f0ab5a7cfed218d617c5154f318e0bce9a6102b265c989" CACHE STRING "libdatadog sha256") + set(SHA256_LIBDATADOG "57f83aff275628bb1af89c22bb4bd696726daf2a9e09b6cd0d966b29e65a7ad6" CACHE STRING "libdatadog sha256") set(FILE_TO_DOWNLOAD libdatadog-aarch64-alpine-linux-musl.tar.gz) else() - set(SHA256_LIBDATADOG "db17a5873d82ef772f969582949b272dcd04044a0cd08b196d3820172a19814d" CACHE STRING "libdatadog sha256") + set(SHA256_LIBDATADOG "36db8d50ccabb71571158ea13835c0f1d05d30b32135385f97c16343cfb6ddd4" CACHE STRING "libdatadog sha256") set(FILE_TO_DOWNLOAD libdatadog-aarch64-unknown-linux-gnu.tar.gz) endif() else() if (DEFINED ENV{IsAlpine} AND "$ENV{IsAlpine}" MATCHES "true") - set(SHA256_LIBDATADOG "46d0e6445fa1b0fbe8d079e6fa997fa10a4fef4084fe10f4b5886c92effc7be8" CACHE STRING "libdatadog sha256") + set(SHA256_LIBDATADOG "2f61fd21cf2f8147743e414b4a8c77250a17be3aecc42a69ffe54f0a603d5c92" CACHE STRING "libdatadog sha256") set(FILE_TO_DOWNLOAD libdatadog-${CMAKE_SYSTEM_PROCESSOR}-alpine-linux-musl.tar.gz) else() - set(SHA256_LIBDATADOG "adaf79470fd0b06ce6d63ae8f231e555fa12b70d5bf82565a96a25f59ea8071d" CACHE STRING "libdatadog sha256") + set(SHA256_LIBDATADOG "f01f05600591063eba4faf388f54c155ab4e6302e5776c7855e3734955f7daf7" CACHE STRING "libdatadog sha256") set(FILE_TO_DOWNLOAD libdatadog-${CMAKE_SYSTEM_PROCESSOR}-unknown-linux-gnu.tar.gz) endif() endif() diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.cpp index eed76805e233..74972224f3ab 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.cpp @@ -5,26 +5,26 @@ #include #include -#include #include -#include #include +#include +#include -#include -#include -#include -#include +#include "FfiHelper.h" #include -#include +#include +#include #include +#include #include -#include "FfiHelper.h" +#include +#include extern "C" { #include "datadog/common.h" -#include "datadog/profiling.h" #include "datadog/crashtracker.h" +#include "datadog/profiling.h" } #include @@ -35,8 +35,8 @@ CrashReporting* CrashReporting::Create(int32_t pid) return (CrashReporting*)crashReporting; } -CrashReportingLinux::CrashReportingLinux(int32_t pid) - : CrashReporting(pid) +CrashReportingLinux::CrashReportingLinux(int32_t pid) : + CrashReporting(pid) { } @@ -67,17 +67,17 @@ int32_t CrashReportingLinux::Initialize() return result; } -std::pair CrashReportingLinux::FindModule(uintptr_t ip) +const ModuleInfo* CrashReportingLinux::FindModule(uintptr_t ip) { for (auto const& module : _modules) { if (ip >= module.startAddress && ip < module.endAddress) { - return std::make_pair(module.path, module.baseAddress); + return &module; } } - return std::make_pair("", 0); + return nullptr; } std::vector CrashReportingLinux::GetModules() @@ -97,10 +97,9 @@ std::vector CrashReportingLinux::GetModules() std::getline(iss, path); // Skip whitespace at the start // Trim path - path.erase(path.begin(), std::find_if(path.begin(), path.end(), [](int ch) - { - return !std::isspace(ch); - })); + path.erase(path.begin(), std::find_if(path.begin(), path.end(), [](int ch) { + return !std::isspace(ch); + })); if (path.empty()) { @@ -133,7 +132,8 @@ std::vector CrashReportingLinux::GetModules() moduleBaseAddresses[path] = baseAddress; } - modules.push_back(ModuleInfo{ start, end, baseAddress, std::move(path) }); + auto buildId = ElfBuildId(path.data()); + modules.push_back(ModuleInfo{ start, end, baseAddress, std::move(path), std::move(buildId) }); } return modules; @@ -196,67 +196,76 @@ std::vector CrashReportingLinux::GetThreadFrames(int32_t tid, Resolv stackFrame.sp = sp; stackFrame.isSuspicious = false; - ResolveMethodData methodData; - auto [moduleName, moduleAddress] = FindModule(ip); - stackFrame.moduleAddress = moduleAddress; - - bool hasName = false; - - unw_proc_info_t procInfo; - result = unw_get_proc_info(&cursor, &procInfo); - - if (result == 0) + auto* module = FindModule(ip); + if (module != nullptr) { - stackFrame.symbolAddress = procInfo.start_ip; + stackFrame.moduleAddress = module->baseAddress; - unw_word_t offset; - result = unw_get_proc_name(&cursor, methodData.symbolName, sizeof(methodData.symbolName), &offset); + bool hasName = false; + + unw_proc_info_t procInfo; + result = unw_get_proc_info(&cursor, &procInfo); if (result == 0) { - stackFrame.method = std::string(methodData.symbolName); - hasName = true; + stackFrame.symbolAddress = procInfo.start_ip; - auto demangleResult = ddog_crasht_demangle(libdatadog::to_char_slice(stackFrame.method), DDOG_CRASHT_DEMANGLE_OPTIONS_COMPLETE); + ResolveMethodData methodData; + unw_word_t offset; + result = unw_get_proc_name(&cursor, methodData.symbolName, sizeof(methodData.symbolName), &offset); - if (demangleResult.tag == DDOG_CRASHT_STRING_WRAPPER_RESULT_OK) + if (result == 0) { - // TODO: There is currently no safe way to free the StringWrapper - auto stringWrapper = demangleResult.ok; + stackFrame.method = std::string(methodData.symbolName); + hasName = true; + + auto demangleResult = ddog_crasht_demangle(libdatadog::to_char_slice(stackFrame.method), DDOG_CRASHT_DEMANGLE_OPTIONS_COMPLETE); - if (stringWrapper.message.len > 0) + if (demangleResult.tag == DDOG_CRASHT_STRING_WRAPPER_RESULT_OK) { - stackFrame.method = std::string((char*)stringWrapper.message.ptr, stringWrapper.message.len); + // TODO: There is currently no safe way to free the StringWrapper + auto stringWrapper = demangleResult.ok; + + if (stringWrapper.message.len > 0) + { + stackFrame.method = std::string((char*)stringWrapper.message.ptr, stringWrapper.message.len); + } } } } - } - if (!hasName) - { - std::ostringstream unknownModule; - unknownModule << moduleName << "!+" << std::hex << (ip - moduleAddress); - stackFrame.method = unknownModule.str(); - } + if (!hasName) + { + std::ostringstream unknownModule; + unknownModule << module->path << "!+" << std::hex << (ip - module->baseAddress); + stackFrame.method = unknownModule.str(); + } - stackFrame.isSuspicious = false; + stackFrame.isSuspicious = false; - fs::path modulePath(moduleName); + stackFrame.buildId = module->build_id; - if (modulePath.has_filename()) - { - const auto moduleFilename = modulePath.stem().string(); + fs::path modulePath(module->path); - if ((moduleFilename.rfind("Datadog", 0) == 0 && stackFrame.method != "dd_pthread_entry") - || moduleFilename == "libdatadog" - || moduleFilename == "datadog" - || moduleFilename == "libddwaf" - || moduleFilename == "ddwaf" ) + if (modulePath.has_filename()) { - stackFrame.isSuspicious = true; + const auto moduleFilename = modulePath.stem().string(); + + if ((moduleFilename.rfind("Datadog", 0) == 0 && stackFrame.method != "dd_pthread_entry") + || moduleFilename == "libdatadog" + || moduleFilename == "datadog" + || moduleFilename == "libddwaf" + || moduleFilename == "ddwaf" ) + { + stackFrame.isSuspicious = true; + } } } + else + { + stackFrame.method = ""; + } frames.push_back(std::move(stackFrame)); @@ -325,5 +334,5 @@ std::string CrashReportingLinux::GetThreadName(int32_t tid) std::string threadName; std::getline(commFile, threadName); commFile.close(); - return threadName; + return threadName; } \ No newline at end of file diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.h index c8266de6a426..2ed89584e7d9 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native.Linux/CrashReportingLinux.h @@ -4,6 +4,8 @@ #pragma once #include "CrashReporting.h" +#include +#include #include #include @@ -14,6 +16,8 @@ struct ModuleInfo uintptr_t endAddress; uintptr_t baseAddress; std::string path; + // defined in CrashReporting.h + ElfBuildId build_id; }; class CrashReportingLinux : public CrashReporting @@ -28,7 +32,7 @@ class CrashReportingLinux : public CrashReporting private: std::vector> GetThreads() override; std::vector GetThreadFrames(int32_t tid, ResolveManagedCallstack resolveManagedCallstack, void* context) override; - std::pair FindModule(uintptr_t ip); + const ModuleInfo* FindModule(uintptr_t ip); std::vector GetModules(); std::string GetSignalInfo(int32_t signal) override; std::string GetThreadName(int32_t tid); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.cpp b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.cpp index 9174c7e9fc76..742b91ef84c4 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.cpp +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.cpp @@ -69,6 +69,17 @@ int32_t CrashReporting::Initialize() return 1; } +#ifdef LINUX + // temporary: will remove it when windows will target libdatadog >= 14.3.0 + auto otherResult = ddog_crasht_CrashInfo_set_procinfo(&_crashInfo, { _pid }); + + if (otherResult.tag == DDOG_CRASHT_RESULT_ERR) + { + SetLastError(otherResult.err); + return 1; + } +#endif + return AddTag("severity", "crash"); } @@ -222,12 +233,22 @@ int32_t CrashReporting::ResolveStacks(int32_t crashingThreadId, ResolveManagedCa .symbol_address = symbolAddress, }; +#ifdef _WINDOWS if (frame.hasPdbInfo) { stackFrames[i].normalized_ip.typ = DDOG_CRASHT_NORMALIZED_ADDRESS_TYPES_PDB; stackFrames[i].normalized_ip.age = frame.pdbAge; stackFrames[i].normalized_ip.build_id = { (uint8_t*)&frame.pdbSig, 16 }; } +#else + const auto buildId = frame.buildId.AsSpan(); + if (buildId.size() != 0) + { + stackFrames[i].normalized_ip.typ = DDOG_CRASHT_NORMALIZED_ADDRESS_TYPES_ELF; + stackFrames[i].normalized_ip.build_id = {buildId.data(), buildId.size()}; + stackFrames[i].normalized_ip.file_offset = ip - moduleAddress; + } +#endif } auto threadIdStr = std::to_string(threadId); diff --git a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.h b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.h index 3126cdf03c59..8f8c626a1a6a 100644 --- a/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.h +++ b/profiler/src/ProfilerEngine/Datadog.Profiler.Native/CrashReporting.h @@ -8,12 +8,18 @@ #include #include #include +#include #include "cor.h" #include "corprof.h" +#include "shared/src/native-src/dd_span.hpp" + extern "C" { +#ifdef LINUX +#include "datadog/blazesym.h" +#endif #include "datadog/common.h" #include "datadog/profiling.h" } @@ -30,6 +36,51 @@ struct ResolveMethodData char symbolName[1024]; }; +#ifdef LINUX +class ElfBuildId +{ +private: + struct ElfBuildIdImpl { + ElfBuildIdImpl() : ElfBuildIdImpl(nullptr) {} + ElfBuildIdImpl(const char* path) : _ptr{nullptr}, _size{0} { + if (path != nullptr) + { + _ptr = blaze_read_elf_build_id(path, &_size); + } + }; + ~ElfBuildIdImpl() + { + auto* ptr = std::exchange(_ptr, nullptr); + if (ptr != nullptr && _size != 0) + { + _size = 0; + ::free(ptr); + } + } + + ElfBuildIdImpl(ElfBuildIdImpl const&) = delete; + ElfBuildIdImpl(ElfBuildIdImpl&&) = delete; + ElfBuildIdImpl& operator=(ElfBuildIdImpl const&) = delete; + ElfBuildIdImpl& operator=(ElfBuildIdImpl&&) = delete; + + std::uint8_t* _ptr; + std::size_t _size; + }; +public: + ElfBuildId() : ElfBuildId(nullptr) {} + ElfBuildId(const char* path) + : _impl{std::make_shared(path)} {} + + shared::span AsSpan() const + { + return shared::span(_impl->_ptr, _impl->_size); + } + +private: + std::shared_ptr _impl; +}; +#endif + struct StackFrame { uint64_t ip; @@ -38,9 +89,13 @@ struct StackFrame uint64_t symbolAddress; uint64_t moduleAddress; bool isSuspicious; +#ifdef _WINDOWS bool hasPdbInfo; DWORD pdbAge; GUID pdbSig; +#else + ElfBuildId buildId; +#endif }; struct Tag diff --git a/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/CreatedumpTests.cs b/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/CreatedumpTests.cs index 8979f2ce96f6..697cf9a7dc7c 100644 --- a/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/CreatedumpTests.cs +++ b/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/CreatedumpTests.cs @@ -14,6 +14,9 @@ using Datadog.Trace.Telemetry; using Datadog.Trace.Telemetry.DTOs; using Datadog.Trace.TestHelpers; +using Datadog.Trace.Util; +using ELFSharp.ELF; +using ELFSharp.ELF.Sections; using FluentAssertions; using FluentAssertions.Execution; using Newtonsoft.Json.Linq; @@ -549,28 +552,27 @@ void ValidateStacktrace(JToken callstack) frame.Should().NotBeNull($"couldn't find expected frame {expectedFrame}"); } - if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + var validatedModules = new HashSet(); + + foreach (var frame in frames) { - var validatedModules = new HashSet(); + var moduleName = frame["names"][0]["name"].Value().Split('!').First(); - // Validate PDBs - foreach (var frame in frames) + if (moduleName.Length > 0 && !moduleName.StartsWith("<") && Path.IsPathRooted(moduleName)) { - // Open the PE file - var moduleName = frame["names"][0]["name"].Value().Split('!').First(); - - if (moduleName.Length > 0 && !moduleName.StartsWith("<") && Path.IsPathRooted(moduleName)) + if (!validatedModules.Add(moduleName)) { - if (!validatedModules.Add(moduleName)) - { - continue; - } + continue; + } + if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) + { + // Validate PDBs var pdbNode = frame["normalized_ip"]["meta"]["Pdb"]; - var hash = ((JArray)pdbNode["guid"]).Select(g => g.Value()).ToArray(); var age = pdbNode["age"].Value(); + // Open the PE file using var file = File.OpenRead(moduleName); using var peReader = new PEReader(file); @@ -581,17 +583,38 @@ void ValidateStacktrace(JToken callstack) age.Should().Be(unchecked((uint)pdbInfo.Age)); hash.Should().Equal(pdbInfo.Guid.ToByteArray()); } + else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) + { + // Validate sofile + if (frame["normalized_ip"] == null) + { + // On linux we can face cases where the build_id is not available: + // - specifically on alpine, /lib/ld-musl-XX do not have a build_id. + // - We are looking at a frame for which the library was unloaded /memfd:doublemapper (deleted) + continue; + } + + var elfNode = frame["normalized_ip"]["meta"]["Elf"]; + var buildId = ((JArray)elfNode["build_id"]).Select(g => g.Value()).ToArray(); + + using var elf = ELFReader.Load(moduleName); + var buildIdNote = elf.GetSection(".note.gnu.build-id") as INoteSection; + buildId.Should().Equal(buildIdNote.Description); + } } + } - validatedModules.Should().NotBeEmpty(); + validatedModules.Should().NotBeEmpty(); #if NETFRAMEWORK - var clrModuleName = "clr.dll"; + var clrModuleName = "clr.dll"; #else - var clrModuleName = "coreclr.dll"; + var clrModuleName = RuntimeInformation.IsOSPlatform(OSPlatform.Linux) ? "libcoreclr.so" : "coreclr.dll"; #endif - validatedModules.Should().ContainMatch($@"*\{clrModuleName}"); + if (!Utils.IsAlpine()) + { + validatedModules.Should().ContainMatch($@"*{Path.DirectorySeparatorChar}{clrModuleName}"); } } } diff --git a/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/Datadog.Trace.Tools.dd_dotnet.ArtifactTests.csproj b/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/Datadog.Trace.Tools.dd_dotnet.ArtifactTests.csproj index 9253b25c11f7..3f17fb174ecb 100644 --- a/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/Datadog.Trace.Tools.dd_dotnet.ArtifactTests.csproj +++ b/tracer/test/Datadog.Trace.Tools.dd_dotnet.ArtifactTests/Datadog.Trace.Tools.dd_dotnet.ArtifactTests.csproj @@ -30,6 +30,7 @@ +