Skip to content

Commit

Permalink
pkg/sensors: reduce stack trace map memory footprint
Browse files Browse the repository at this point in the history
We stopped on a stack trace map that has a max_entries of 32768, which
is 64 bits pointers * PERF_MAX_STACK_DEPTH (which is fixed at 127 for
now), so 127*64/8=1016 bytes per entry + it's key_size of 32 bits (4
bytes) so 1020 bytes per entry. So 1020 * 32768 = 33,423,360 bytes.
From bpftool, this map has a total bytes_memlock of 34,079,040 bytes.
So for each stack trace map we load, we had 34MB of kernel memory, and
it happened to be loaded many times when we were loading any tracing
policy.

Since the map is used by the generic program, the loader will allocate
the memory needed for the map even if we don't create a reference from
the agent side and create an anonymous map. So we end up allocating a
small map of max_entries 1 by default and resize it when the tracing
policy actually specifies a matchAction containing a kernelStackTrace or
userStackTrace to true. This should drastically reduce the memory
footprint of this feature when it's unused.

Signed-off-by: Mahe Tardy <mahe.tardy@gmail.com>
  • Loading branch information
mtardy committed Jun 13, 2024
1 parent 23b041e commit 22510d9
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 4 deletions.
2 changes: 1 addition & 1 deletion bpf/process/types/basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -2103,7 +2103,7 @@ update_pid_tid_from_sock(struct msg_generic_kprobe *e, __u64 sockaddr)
#define PERF_MAX_STACK_DEPTH 127
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 32768);
__uint(max_entries, 1); // Agent is resizing this if the feature is needed during kprobe load
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u64) * PERF_MAX_STACK_DEPTH);
} stack_trace_map SEC(".maps");
Expand Down
34 changes: 31 additions & 3 deletions pkg/sensors/tracing/generickprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ const (
CharBufErrorPageFault = -2
CharBufErrorTooLarge = -3
CharBufSavedForRetprobe = -4

stackTraceMapMaxEntries = 32768 // this value could be fine tuned
)

func kprobeCharBufErrorToString(e int32) string {
Expand Down Expand Up @@ -89,6 +91,8 @@ type pendingEventKey struct {
}

type genericKprobeData struct {
// stackTraceMap reference is needed when retrieving stack traces from
// userspace when receiving events containing stacktrace IDs
stackTraceMap *program.Map
}

Expand Down Expand Up @@ -129,6 +133,11 @@ type genericKprobe struct {
// ont global instance when we use kprobe multi
data *genericKprobeData

// Does this kprobe is using stacktraces? Note that as specified in the
// above data field comment, the map is global for multikprobe and unique
// for each kprobe when using single kprobes.
hasStackTrace bool

customHandler eventhandler.Handler
}

Expand Down Expand Up @@ -262,6 +271,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
var maps []*program.Map

data := &genericKprobeData{}
oneKprobeHasStackTrace := false

for _, id := range multiIDs {
gk, err := genericKprobeTableGet(id)
Expand All @@ -271,6 +281,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
if gk.loadArgs.retprobe {
multiRetIDs = append(multiRetIDs, id)
}
oneKprobeHasStackTrace = oneKprobeHasStackTrace || gk.hasStackTrace
gk.data = data
}

Expand Down Expand Up @@ -322,7 +333,11 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
maps = append(maps, matchBinariesPaths)

stackTraceMap := program.MapBuilderPin("stack_trace_map", sensors.PathJoin(pinPath, "stack_trace_map"), load)
if oneKprobeHasStackTrace {
stackTraceMap.SetMaxEntries(stackTraceMapMaxEntries)
}
maps = append(maps, stackTraceMap)
data.stackTraceMap = stackTraceMap

if kernels.EnableLargeProgs() {
socktrack := program.MapBuilderPin("socktrack_map", sensors.PathJoin(sensorPath, "socktrack_map"), load)
Expand All @@ -335,8 +350,6 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
filterMap.SetMaxEntries(len(multiIDs))
configMap.SetMaxEntries(len(multiIDs))

data.stackTraceMap = stackTraceMap

if len(multiRetIDs) != 0 {
loadret := program.Builder(
path.Join(option.Config.HubbleLib, loadProgRetName),
Expand Down Expand Up @@ -752,6 +765,13 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt
config.Syscall = 0
}

hasStackTrace := false
for _, selector := range f.Selectors {
for _, matchAction := range selector.MatchActions {
hasStackTrace = matchAction.KernelStackTrace || matchAction.UserStackTrace
}
}

// create a new entry on the table, and pass its id to BPF-side
// so that we can do the matching at event-generation time
kprobeEntry := genericKprobe{
Expand All @@ -770,6 +790,7 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt
customHandler: in.customHandler,
message: msgField,
tags: tagsField,
hasStackTrace: hasStackTrace,
}

// Parse Filters into kernel filter logic
Expand Down Expand Up @@ -862,9 +883,16 @@ func createKprobeSensorFromEntry(kprobeEntry *genericKprobe, sensorPath string,
}
maps = append(maps, matchBinariesPaths)

// loading the stack trace map in any case so that it does not end up as an
// anonymous map (as it's always used by the BPF prog) and is clearly linked
// to tetragon
stackTraceMap := program.MapBuilderPin("stack_trace_map", sensors.PathJoin(pinPath, "stack_trace_map"), load)
if kprobeEntry.hasStackTrace {
// to reduce memory footprint however, the stack map is created with a
// max entry of 1, we need to expand that at loading.
stackTraceMap.SetMaxEntries(stackTraceMapMaxEntries)
}
maps = append(maps, stackTraceMap)

kprobeEntry.data.stackTraceMap = stackTraceMap

if kernels.EnableLargeProgs() {
Expand Down

0 comments on commit 22510d9

Please sign in to comment.