Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Configure stack size #284

Merged
merged 12 commits into from
Jul 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions include/ddprof_cli.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,12 @@ struct DDProfCLI {

// extended
std::string cpu_affinity;
uint32_t default_stack_sample_size{k_default_perf_stack_sample_size};
bool show_samples{false};
bool fault_info{true};
bool help_extended{false};
int socket{-1};
// valid state to continue ?
bool continue_exec = {false};
bool continue_exec{false};

// args
std::vector<std::string> command_line;
Expand Down
6 changes: 5 additions & 1 deletion include/ddprof_cmdline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#pragma once

#include "ddprof_defs.hpp"

#include <stddef.h> // size_t
#include <stdint.h> // uint64_t
#include <vector>
Expand All @@ -28,4 +30,6 @@ bool arg_inset(const char *str, char const *const *set, int sz_set);

bool arg_yesno(const char *str, int mode);

bool watchers_from_str(const char *str, std::vector<PerfWatcher> &watchers);
bool watchers_from_str(
const char *str, std::vector<PerfWatcher> &watchers,
uint32_t stack_sample_size = k_default_perf_stack_sample_size);
13 changes: 13 additions & 0 deletions include/ddprof_defs.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,19 @@
// Maximum depth for a single stack
#define DD_MAX_STACK_DEPTH 512

// Sample stack size must a multiple of 8 and strictly inferior to 2^16
// Note that since maximum perf_event_hdr size is 2^16-1 and there are other
// data/headers in perf_event struct, actual maximum stack sample size returned
// as `size_stack` might be smaller then the requested size
// The type is uint32 to be consistent with the perf_event interface
// Check linux sources for a reference to the sample size check
constexpr uint32_t k_default_perf_stack_sample_size = 32000;

// considering sample size, we adjust the size of ring buffers.
// Following is considered as a minimum number of samples to be fit in the
// ring buffer.
constexpr auto k_min_number_samples_per_ring_buffer = 8;

constexpr int k_size_api_key = 32;

// Linux Inode type
Expand Down
40 changes: 23 additions & 17 deletions include/event_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

#pragma once

#include "ddprof_defs.hpp"

#include <stdint.h>
#include <string>
#include <vector>
Expand Down Expand Up @@ -141,29 +143,33 @@ enum class EventConfField {
* The `perf_event` register number to use for sample normalization. In a
* future patch, the user will be able to use register names.
*/
kStackSampleSize,
/*
* The `perf_event` setting on stack size. Defines the size of samples that
* are copied from the user application. This will define how far we can
* unwind.
*/
};

struct EventConf {
EventConfMode mode;

int64_t id;
EventConfMode mode{};

std::string eventname;
std::string groupname;
std::string label;
int64_t id{};

EventConfValueSource value_source;
uint8_t register_num;
uint8_t raw_size;
uint64_t raw_offset;
double value_scale;
std::string eventname{};
std::string groupname{};
std::string label{};

EventConfCadenceType cad_type;
int64_t cadence;
EventConfValueSource value_source{};
uint8_t register_num{};
uint8_t raw_size{};
uint64_t raw_offset{};
uint32_t stack_sample_size{k_default_perf_stack_sample_size};
double value_scale{};

void clear() { *this = EventConf{}; }
EventConfCadenceType cad_type{};
int64_t cadence{};
};

int EventConf_parse(
const char *msg,
std::vector<EventConf> &event_configs); // Provided by generated code
int EventConf_parse(const char *msg, const EventConf &template_conf,
std::vector<EventConf> &event_configs);
1 change: 1 addition & 0 deletions include/ipc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ struct ReplyMessage {
// cppcheck-suppress unusedStructMember
RingBufferInfo ring_buffer;
int32_t allocation_flags = 0;
uint32_t stack_sample_size = 0;
};

class Client {
Expand Down
33 changes: 33 additions & 0 deletions include/lib/allocation_event.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0. This product includes software
// developed at Datadog (https://www.datadoghq.com/). Copyright 2023-Present
// Datadog, Inc.

#pragma once

#include <linux/perf_event.h>

namespace ddprof {

// AllocationEvent
// This represent a sampled allocation.
// We Keep the same layout as a perf event to unify the code paths.
struct AllocationEvent {
perf_event_header hdr;
struct sample_id sample_id;
uint64_t addr; /* if PERF_SAMPLE_ADDR */
uint64_t period;
uint64_t abi; /* if PERF_SAMPLE_REGS_USER */
uint64_t regs[PERF_REGS_COUNT]; /* if PERF_SAMPLE_REGS_USER */
uint64_t size_stack; /* if PERF_SAMPLE_STACK_USER */
std::byte data[]; /* requires PERF_SAMPLE_STACK_USER, dyn size
will contain the actual size */
};
// An extra field is added after the end to communicate the dyn_size
// uint64_t dyn_size_stack;

inline size_t sizeof_allocation_event(uint32_t stack_sample_user) {
// (Size of the event) + + sizeof(dyn_size field)
return sizeof(AllocationEvent) + stack_sample_user + sizeof(uint64_t);
}
} // namespace ddprof
4 changes: 3 additions & 1 deletion include/lib/allocation_tracker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class AllocationTracker {

static DDRes allocation_tracking_init(uint64_t allocation_profiling_rate,
uint32_t flags,
uint32_t stack_sample_size,
const RingBufferInfo &ring_buffer);
static void allocation_tracking_free();

Expand Down Expand Up @@ -85,7 +86,7 @@ class AllocationTracker {
uint64_t next_sample_interval();

DDRes init(uint64_t mem_profile_interval, bool deterministic_sampling,
const RingBufferInfo &ring_buffer);
uint32_t stack_sample_size, const RingBufferInfo &ring_buffer);
void free();

static AllocationTracker *create_instance();
Expand All @@ -108,6 +109,7 @@ class AllocationTracker {

TrackerState _state;
uint64_t _sampling_interval;
uint32_t _stack_sample_size;
std::mt19937 _gen;
PEvent _pevent;
bool _deterministic_sampling;
Expand Down
6 changes: 0 additions & 6 deletions include/perf.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,6 @@

#define PSAMPLE_DEFAULT_WAKEUP_MS 100 // sample frequency check

// Sample stack size must a multiple of 8 and strictly inferior to 2^16
// Note that since maximum perf_event_hdr size is 2^16-1 and there are other
// data/headers in perf_event struct, actual maximum stack sample size returned
// as `size_stack` might be smaller then the requested size
#define PERF_SAMPLE_STACK_SIZE (4096UL * 8)

struct read_format {
uint64_t value; // The value of the event
uint64_t time_enabled; // if PERF_FORMAT_TOTAL_TIME_ENABLED
Expand Down
5 changes: 3 additions & 2 deletions include/perf_watcher.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@

#pragma once

#include "ddprof_defs.hpp"
#include "event_config.hpp"

#include <string>

#include <linux/perf_event.h>
Expand All @@ -24,6 +24,8 @@ struct PerfWatcherOptions {
uint8_t nb_frames_to_skip; // number of bottom frames to skip in stack trace
// (useful for allocation profiling to remove
// frames belonging to lib_ddprofiling.so)
uint32_t stack_sample_size{
k_default_perf_stack_sample_size}; // size of the user stack to capture
};

struct PerfWatcher {
Expand All @@ -38,7 +40,6 @@ struct PerfWatcher {
uint64_t sample_frequency;
};
int sample_type_id; // index into the sample types defined in this header
uint16_t sample_stack_size; // size of the stack to capture

// perf_event_open configs
struct PerfWatcherOptions options;
Expand Down
6 changes: 6 additions & 0 deletions include/pevent_lib.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ DDRes pevent_open(DDProfContext *ctx, pid_t pid, int num_cpu,
/// Setup mmap buffers according to content of peventhdr
DDRes pevent_mmap(PEventHdr *pevent_hdr, bool use_override);

/// Compute minimum size for a given ring buffer
/// This is adjusted using the number of samples we can fit in a buffer
int pevent_compute_min_mmap_order(int min_buffer_size_order,
uint32_t stack_sample_size,
unsigned min_number_samples);

/// Setup watchers = setup mmap + setup perfevent
DDRes pevent_setup(DDProfContext &ctx, pid_t pid, int num_cpu,
PEventHdr *pevent_hdr);
Expand Down
1 change: 1 addition & 0 deletions include/presets.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ struct Preset {
};

DDRes add_preset(std::string_view preset, bool pid_or_global_mode,
uint32_t default_stack_sample_size,
std::vector<PerfWatcher> &watchers);

} // namespace ddprof
33 changes: 32 additions & 1 deletion src/ddprof_cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,23 @@ void write_config_file(const CLI::App &app, const std::string &file_path) {
out_file << app.config_to_str();
out_file.close();
}

using Validator = CLI::Validator;
struct SampleStackSizeValidator : public Validator {
SampleStackSizeValidator() {
name_ = "SAMPLE_STACK_SIZE";
func_ = [](const std::string &str) {
int value = std::stoi(str);
if (value >= USHRT_MAX || value % 8 != 0) {
return std::string("Invalid stack_sample_size value. Value should be "
"less than " +
std::to_string(USHRT_MAX) + " and a multiple of 8.");
} else {
return std::string();
}
};
}
};
} // namespace

int DDProfCLI::parse(int argc, const char *argv[]) {
Expand Down Expand Up @@ -260,6 +277,15 @@ int DDProfCLI::parse(int argc, const char *argv[]) {
"Profiler's IPC socket, as a file descriptor")
->envname("DD_PROFILING_NATIVE_SOCKET")
->group(""));
extended_options.push_back(
app.add_option("--stack_sample_size", default_stack_sample_size,
"Sample size for the user's stack."
"This setting can help with truncated stack traces."
"Maximum value is 65528 (<USHORT_MAX and 8Bytes aligned).")
->default_val(k_default_perf_stack_sample_size)
->envname("DD_PROFILING_SAMPLE_STACK_USER")
->group("")
->check(SampleStackSizeValidator()));

// Parse
CLI11_PARSE(app, argc, argv);
Expand Down Expand Up @@ -304,7 +330,7 @@ int DDProfCLI::parse(int argc, const char *argv[]) {
DDRes DDProfCLI::add_watchers_from_events(
std::vector<PerfWatcher> &watchers) const {
for (const auto &el : events) {
if (!watchers_from_str(el.c_str(), watchers)) {
if (!watchers_from_str(el.c_str(), watchers, default_stack_sample_size)) {
DDRES_RETURN_ERROR_LOG(DD_WHAT_INPUT_PROCESS,
"Invalid event/tracepoint (%s)", el.c_str());
}
Expand Down Expand Up @@ -387,6 +413,11 @@ void DDProfCLI::print() const {
PRINT_NFO(" - show_samples: %s", show_samples ? "true" : "false");
}
PRINT_NFO(" - fault_info: %s", fault_info ? "true" : "false");

if (default_stack_sample_size != k_default_perf_stack_sample_size) {
PRINT_NFO("Extended:");
PRINT_NFO(" - stack_sample_size: %u", default_stack_sample_size);
}
}

CommandLineWrapper DDProfCLI::get_user_command_line() const {
Expand Down
10 changes: 6 additions & 4 deletions src/ddprof_cmdline.cc
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,9 @@ static bool watcher_from_config(EventConf *conf, PerfWatcher *watcher) {
watcher->tracepoint_event = conf->eventname;
watcher->tracepoint_group = conf->groupname;
watcher->tracepoint_label = conf->label;

watcher->options.stack_sample_size = conf->stack_sample_size;
// Allocation watcher, has an extra field to ensure we capture address

if (watcher->config == kDDPROF_COUNT_ALLOCATIONS) {
watcher->sample_type |= PERF_SAMPLE_ADDR;
}
Expand All @@ -135,9 +136,11 @@ static bool watcher_from_config(EventConf *conf, PerfWatcher *watcher) {
}

// If this returns false, then the passed watcher should be regarded as invalid
bool watchers_from_str(const char *str, std::vector<PerfWatcher> &watchers) {
bool watchers_from_str(const char *str, std::vector<PerfWatcher> &watchers,
uint32_t stack_sample_size) {
std::vector<EventConf> configs;
if (EventConf_parse(str, configs) != 0) {
EventConf template_conf{.stack_sample_size = stack_sample_size};
if (EventConf_parse(str, template_conf, configs) != 0) {
return false;
}

Expand All @@ -148,6 +151,5 @@ bool watchers_from_str(const char *str, std::vector<PerfWatcher> &watchers) {
}
watchers.push_back(std::move(watcher));
}

return true;
}
3 changes: 2 additions & 1 deletion src/ddprof_context_lib.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ DDRes context_add_watchers(const DDProfCLI &ddprof_cli, DDProfContext &ctx) {
if (!preset.empty()) {
bool pid_or_global_mode =
(ddprof_cli.global || ddprof_cli.pid) && ctx.params.sockfd == -1;
DDRES_CHECK_FWD(add_preset(preset, pid_or_global_mode, watchers));
DDRES_CHECK_FWD(add_preset(preset, pid_or_global_mode,
ddprof_cli.default_stack_sample_size, watchers));
}

// Add a dummy watcher if needed
Expand Down
3 changes: 2 additions & 1 deletion src/ddprof_worker.cc
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,8 @@ static DDRes ddprof_unwind_sample(DDProfContext &ctx, perf_event_sample *sample,
* That's why we consider the stack as truncated in input only if it is also
* detected as incomplete during unwinding.
*/
if (sample->size_stack == ctx.watchers[watcher_pos].sample_stack_size &&
if (sample->size_stack ==
ctx.watchers[watcher_pos].options.stack_sample_size &&
us->output.is_incomplete) {
ddprof_stats_add(STATS_UNWIND_TRUNCATED_INPUT, 1, nullptr);
}
Expand Down
25 changes: 13 additions & 12 deletions src/event_parser/event_parser.l
Original file line number Diff line number Diff line change
Expand Up @@ -55,18 +55,19 @@
}
}

s|value_scale|scale DISPATCH(ValueScale)
f|frequency|freq DISPATCH(Frequency)
e|event|eventname|ev DISPATCH(Event)
g|group|groupname|gr DISPATCH(Group)
i|id DISPATCH(Id)
l|label DISPATCH(Label)
m|mode DISPATCH(Mode)
n|arg_num|argno DISPATCH(Parameter)
o|raw_offset|rawoff DISPATCH(RawOffset)
p|period|per DISPATCH(Period)
r|register|regno DISPATCH(Register)
z|raw_size|rawsz DISPATCH(RawSize)
s|value_scale|scale DISPATCH(ValueScale)
f|frequency|freq DISPATCH(Frequency)
e|event|eventname|ev DISPATCH(Event)
g|group|groupname|gr DISPATCH(Group)
i|id DISPATCH(Id)
l|label DISPATCH(Label)
m|mode DISPATCH(Mode)
n|arg_num|argno DISPATCH(Parameter)
o|raw_offset|rawoff DISPATCH(RawOffset)
p|period|per DISPATCH(Period)
st|stack_sample_size|stcksz DISPATCH(StackSampleSize)
r|register|regno DISPATCH(Register)
z|raw_size|rawsz DISPATCH(RawSize)

= {
BEGIN VALUE;
Expand Down
Loading