From 1db967d6e20fa06429e523e19c66dca50ea4216b Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Thu, 13 Jun 2024 16:53:12 +0200 Subject: [PATCH 1/4] bpf: remove rate limit maps and structs for kernel <5.3 Since the rate limit feature is only available for LARGE_BPF_PROG, let's remove the unnecessary map and the struct from the small BPF progs. Signed-off-by: Mahe Tardy --- bpf/process/types/basic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bpf/process/types/basic.h b/bpf/process/types/basic.h index 3a45b4b8a9e..084d9797ac4 100644 --- a/bpf/process/types/basic.h +++ b/bpf/process/types/basic.h @@ -1891,6 +1891,7 @@ FUNC_INLINE void do_action_signal(int signal) */ #define KEY_BYTES_PER_ARG 40 +#ifdef __LARGE_BPF_PROG /* Rate limit scope. */ #define ACTION_RATE_LIMIT_SCOPE_THREAD 0 #define ACTION_RATE_LIMIT_SCOPE_PROCESS 1 @@ -1932,7 +1933,6 @@ struct { __type(value, __u8[sizeof(struct ratelimit_key) + 128]); } ratelimit_ro_heap SEC(".maps"); -#ifdef __LARGE_BPF_PROG FUNC_INLINE bool rate_limit(__u64 ratelimit_interval, __u64 ratelimit_scope, struct msg_generic_kprobe *e) { From 9f67f41abb417efdcaafa1430658127f358d75f3 Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Thu, 13 Jun 2024 16:59:17 +0200 Subject: [PATCH 2/4] pkg/sensors: pin the ratelimit_map to the fs Signed-off-by: Mahe Tardy --- pkg/sensors/tracing/generickprobe.go | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pkg/sensors/tracing/generickprobe.go b/pkg/sensors/tracing/generickprobe.go index 41d1f6ea05d..785d280b4df 100644 --- a/pkg/sensors/tracing/generickprobe.go +++ b/pkg/sensors/tracing/generickprobe.go @@ -344,6 +344,11 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E maps = append(maps, socktrack) } + if kernels.EnableLargeProgs() { + ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load) + maps = append(maps, ratelimitMap) + } + enforcerDataMap := enforcerMap(policyName, load) maps = append(maps, enforcerDataMap) @@ -900,6 +905,11 @@ func createKprobeSensorFromEntry(kprobeEntry *genericKprobe, sensorPath string, maps = append(maps, socktrack) } + if kernels.EnableLargeProgs() { + ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load) + maps = append(maps, ratelimitMap) + } + enforcerDataMap := enforcerMap(kprobeEntry.policyName, load) maps = append(maps, enforcerDataMap) From c624efd166e27fc9bc24464fa0f078b63ee964cb Mon Sep 17 00:00:00 2001 From: Mahe Tardy Date: Thu, 13 Jun 2024 17:09:23 +0200 Subject: [PATCH 3/4] pkg/sensors: reduce ratelimit map memory footprint This commit is very similar to 22510d98a16594b19b6ef8c68d3db5da5cb0a8cc For every ratelimit map loaded, we add ~10MB of kernel memory, and each kprobe added was adding a ratelimit map. We now only load that map if the user used the rateLimit field in a matchActions to reduce the memory footprint of this feature when unused. Signed-off-by: Mahe Tardy --- bpf/process/types/basic.h | 2 +- pkg/sensors/tracing/generickprobe.go | 30 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/bpf/process/types/basic.h b/bpf/process/types/basic.h index 084d9797ac4..3c234768e5f 100644 --- a/bpf/process/types/basic.h +++ b/bpf/process/types/basic.h @@ -1910,7 +1910,7 @@ struct ratelimit_value { struct { __uint(type, BPF_MAP_TYPE_LRU_HASH); - __uint(max_entries, 32768); + __uint(max_entries, 1); // Agent is resizing this if the feature is needed during kprobe load __type(key, struct ratelimit_key); __type(value, struct ratelimit_value); } ratelimit_map SEC(".maps"); diff --git a/pkg/sensors/tracing/generickprobe.go b/pkg/sensors/tracing/generickprobe.go index 785d280b4df..8a625b5cf41 100644 --- a/pkg/sensors/tracing/generickprobe.go +++ b/pkg/sensors/tracing/generickprobe.go @@ -58,7 +58,10 @@ const ( CharBufErrorTooLarge = -3 CharBufSavedForRetprobe = -4 - stackTraceMapMaxEntries = 32768 // this value could be fine tuned + // The following values could be fine tuned if either those feature use too + // much kernel memory when enabled. + stackTraceMapMaxEntries = 32768 + ratelimitMapMaxEntries = 32768 ) func kprobeCharBufErrorToString(e int32) string { @@ -138,6 +141,9 @@ type genericKprobe struct { // for each kprobe when using single kprobes. hasStackTrace bool + // is there ratelimit defined in the kprobe + hasRatelimit bool + customHandler eventhandler.Handler } @@ -272,6 +278,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E data := &genericKprobeData{} oneKprobeHasStackTrace := false + oneKprobeHasRatelimit := false for _, id := range multiIDs { gk, err := genericKprobeTableGet(id) @@ -282,6 +289,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E multiRetIDs = append(multiRetIDs, id) } oneKprobeHasStackTrace = oneKprobeHasStackTrace || gk.hasStackTrace + oneKprobeHasRatelimit = oneKprobeHasRatelimit || gk.hasRatelimit gk.data = data } @@ -346,6 +354,9 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E if kernels.EnableLargeProgs() { ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load) + if oneKprobeHasRatelimit { + ratelimitMap.SetMaxEntries(ratelimitMapMaxEntries) + } maps = append(maps, ratelimitMap) } @@ -796,6 +807,7 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt message: msgField, tags: tagsField, hasStackTrace: hasStackTrace, + hasRatelimit: selectorsHaveRateLimit(f.Selectors), } // Parse Filters into kernel filter logic @@ -907,6 +919,11 @@ func createKprobeSensorFromEntry(kprobeEntry *genericKprobe, sensorPath string, if kernels.EnableLargeProgs() { ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load) + if kprobeEntry.hasRatelimit { + // similarly as for stacktrace, we expand the max size only if + // needed to reduce the memory footprint when unused + ratelimitMap.SetMaxEntries(ratelimitMapMaxEntries) + } maps = append(maps, ratelimitMap) } @@ -1284,3 +1301,14 @@ func retprobeMerge(prev pendingEvent, curr pendingEvent) *tracing.MsgGenericKpro func (k *observerKprobeSensor) LoadProbe(args sensors.LoadProbeArgs) error { return loadGenericKprobeSensor(args.BPFDir, args.Load, args.Verbose) } + +func selectorsHaveRateLimit(selectors []v1alpha1.KProbeSelector) bool { + for _, selector := range selectors { + for _, matchAction := range selector.MatchActions { + if len(matchAction.RateLimit) > 0 { + return true + } + } + } + return false +} From 5c874886db9f7cd10c535d1c7ad19a3682bf4358 Mon Sep 17 00:00:00 2001 From: Kevin Sheldrake Date: Thu, 13 Jun 2024 18:29:35 +0100 Subject: [PATCH 4/4] pkg/sensors: add rate limit test Add a NoRateLimit test and a RateLimitTest. Signed-off-by: Kevin Sheldrake --- pkg/sensors/tracing/kprobe_test.go | 121 +++++++++++++++++++++++++++++ 1 file changed, 121 insertions(+) diff --git a/pkg/sensors/tracing/kprobe_test.go b/pkg/sensors/tracing/kprobe_test.go index 47f32d9c857..0bc60b81452 100644 --- a/pkg/sensors/tracing/kprobe_test.go +++ b/pkg/sensors/tracing/kprobe_test.go @@ -19,6 +19,7 @@ import ( "sync" "syscall" "testing" + "time" "unsafe" "github.com/cilium/ebpf" @@ -5783,6 +5784,126 @@ spec: assert.NoError(t, err) } +func testKprobeRateLimit(t *testing.T, rateLimit bool) { + hook := `apiVersion: cilium.io/v1alpha1 +kind: TracingPolicy +metadata: + name: "datagram" +spec: + kprobes: + - call: "ip_send_skb" + syscall: false + args: + - index: 1 + type: "skb" + label: "datagram" + selectors: + - matchArgs: + - index: 1 + operator: "DAddr" + values: + - "127.0.0.1" + - index: 1 + operator: "DPort" + values: + - "9468" + - index: 1 + operator: "Protocol" + values: + - "IPPROTO_UDP" +` + + if rateLimit { + hook += ` + matchActions: + - action: Post + rateLimit: "5" + rateLimitScope: "global" +` + } + + var doneWG, readyWG sync.WaitGroup + defer doneWG.Wait() + + ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime) + defer cancel() + + createCrdFile(t, hook) + obs, err := observertesthelper.GetDefaultObserverWithFile(t, ctx, testConfigFile, tus.Conf().TetragonLib) + if err != nil { + t.Fatalf("GetDefaultObserverWithFile error: %s", err) + } + observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs) + readyWG.Wait() + + server := "nc.openbsd" + cmdServer := exec.Command(server, "-unvlp", "9468", "-s", "127.0.0.1") + assert.NoError(t, cmdServer.Start()) + time.Sleep(1 * time.Second) + + // Generate 5 datagrams + socket, err := net.Dial("udp", "127.0.0.1:9468") + if err != nil { + fmt.Printf("ERROR dialing socket\n") + panic(err) + } + + for i := 0; i < 5; i++ { + _, err := socket.Write([]byte("data")) + if err != nil { + fmt.Printf("ERROR writing to socket\n") + panic(err) + } + } + + kpChecker := ec.NewProcessKprobeChecker("datagram-checker"). + WithFunctionName(sm.Full("ip_send_skb")). + WithArgs(ec.NewKprobeArgumentListMatcher(). + WithOperator(lc.Ordered). + WithValues( + ec.NewKprobeArgumentChecker().WithLabel(sm.Full("datagram")). + WithSkbArg(ec.NewKprobeSkbChecker(). + WithDaddr(sm.Full("127.0.0.1")). + WithDport(9468). + WithProtocol(sm.Full("IPPROTO_UDP")), + ), + )) + + var checkerSuccess *ec.UnorderedEventChecker + var checkerFailure *ec.UnorderedEventChecker + if rateLimit { + // Rate limit. We should have 1. We shouldn't have 2 (or more) + checkerSuccess = ec.NewUnorderedEventChecker(kpChecker) + checkerFailure = ec.NewUnorderedEventChecker(kpChecker, kpChecker) + } else { + // No rate limit. We should have 5. We shouldn't have 6. + checkerSuccess = ec.NewUnorderedEventChecker(kpChecker, kpChecker, kpChecker, kpChecker, kpChecker) + checkerFailure = ec.NewUnorderedEventChecker(kpChecker, kpChecker, kpChecker, kpChecker, kpChecker, kpChecker) + } + cmdServer.Process.Kill() + + err = jsonchecker.JsonTestCheck(t, checkerSuccess) + assert.NoError(t, err) + err = jsonchecker.JsonTestCheckExpect(t, checkerFailure, true) + assert.NoError(t, err) +} + +func TestKprobeNoRateLimit(t *testing.T) { + if !kernels.EnableLargeProgs() { + t.Skip("Test requires kernel 5.4") + } + + testKprobeRateLimit(t, false) +} + +func TestKprobeRateLimit(t *testing.T) { + if !kernels.EnableLargeProgs() { + t.Skip("Test requires kernel 5.4") + } + + testKprobeRateLimit(t, true) +} + func TestKprobeListSyscallDupsRange(t *testing.T) { if !kernels.MinKernelVersion("5.3.0") { t.Skip("TestCopyFd requires at least 5.3.0 version")