Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

pkg/sensors: reduce ratelimit map memory footprint #2551

Merged
merged 4 commits into from
Jun 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions bpf/process/types/basic.h
Original file line number Diff line number Diff line change
Expand Up @@ -1891,6 +1891,7 @@ FUNC_INLINE void do_action_signal(int signal)
*/
#define KEY_BYTES_PER_ARG 40

#ifdef __LARGE_BPF_PROG
/* Rate limit scope. */
#define ACTION_RATE_LIMIT_SCOPE_THREAD 0
#define ACTION_RATE_LIMIT_SCOPE_PROCESS 1
Expand All @@ -1909,7 +1910,7 @@ struct ratelimit_value {

struct {
__uint(type, BPF_MAP_TYPE_LRU_HASH);
__uint(max_entries, 32768);
__uint(max_entries, 1); // Agent is resizing this if the feature is needed during kprobe load
__type(key, struct ratelimit_key);
__type(value, struct ratelimit_value);
} ratelimit_map SEC(".maps");
Expand All @@ -1932,7 +1933,6 @@ struct {
__type(value, __u8[sizeof(struct ratelimit_key) + 128]);
} ratelimit_ro_heap SEC(".maps");

#ifdef __LARGE_BPF_PROG
FUNC_INLINE bool
rate_limit(__u64 ratelimit_interval, __u64 ratelimit_scope, struct msg_generic_kprobe *e)
{
Expand Down
40 changes: 39 additions & 1 deletion pkg/sensors/tracing/generickprobe.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,10 @@ const (
CharBufErrorTooLarge = -3
CharBufSavedForRetprobe = -4

stackTraceMapMaxEntries = 32768 // this value could be fine tuned
// The following values could be fine tuned if either those feature use too
// much kernel memory when enabled.
stackTraceMapMaxEntries = 32768
ratelimitMapMaxEntries = 32768
)

func kprobeCharBufErrorToString(e int32) string {
Expand Down Expand Up @@ -138,6 +141,9 @@ type genericKprobe struct {
// for each kprobe when using single kprobes.
hasStackTrace bool

// is there ratelimit defined in the kprobe
hasRatelimit bool

customHandler eventhandler.Handler
}

Expand Down Expand Up @@ -272,6 +278,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E

data := &genericKprobeData{}
oneKprobeHasStackTrace := false
oneKprobeHasRatelimit := false

for _, id := range multiIDs {
gk, err := genericKprobeTableGet(id)
Expand All @@ -282,6 +289,7 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
multiRetIDs = append(multiRetIDs, id)
}
oneKprobeHasStackTrace = oneKprobeHasStackTrace || gk.hasStackTrace
oneKprobeHasRatelimit = oneKprobeHasRatelimit || gk.hasRatelimit
gk.data = data
}

Expand Down Expand Up @@ -344,6 +352,14 @@ func createMultiKprobeSensor(sensorPath, policyName string, multiIDs []idtable.E
maps = append(maps, socktrack)
}

if kernels.EnableLargeProgs() {
ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load)
if oneKprobeHasRatelimit {
ratelimitMap.SetMaxEntries(ratelimitMapMaxEntries)
}
maps = append(maps, ratelimitMap)
}

enforcerDataMap := enforcerMap(policyName, load)
maps = append(maps, enforcerDataMap)

Expand Down Expand Up @@ -791,6 +807,7 @@ func addKprobe(funcName string, f *v1alpha1.KProbeSpec, in *addKprobeIn) (id idt
message: msgField,
tags: tagsField,
hasStackTrace: hasStackTrace,
hasRatelimit: selectorsHaveRateLimit(f.Selectors),
}

// Parse Filters into kernel filter logic
Expand Down Expand Up @@ -900,6 +917,16 @@ func createKprobeSensorFromEntry(kprobeEntry *genericKprobe, sensorPath string,
maps = append(maps, socktrack)
}

if kernels.EnableLargeProgs() {
ratelimitMap := program.MapBuilderPin("ratelimit_map", sensors.PathJoin(sensorPath, "ratelimit_map"), load)
if kprobeEntry.hasRatelimit {
// similarly as for stacktrace, we expand the max size only if
// needed to reduce the memory footprint when unused
ratelimitMap.SetMaxEntries(ratelimitMapMaxEntries)
}
maps = append(maps, ratelimitMap)
}

enforcerDataMap := enforcerMap(kprobeEntry.policyName, load)
maps = append(maps, enforcerDataMap)

Expand Down Expand Up @@ -1274,3 +1301,14 @@ func retprobeMerge(prev pendingEvent, curr pendingEvent) *tracing.MsgGenericKpro
func (k *observerKprobeSensor) LoadProbe(args sensors.LoadProbeArgs) error {
return loadGenericKprobeSensor(args.BPFDir, args.Load, args.Verbose)
}

func selectorsHaveRateLimit(selectors []v1alpha1.KProbeSelector) bool {
for _, selector := range selectors {
for _, matchAction := range selector.MatchActions {
if len(matchAction.RateLimit) > 0 {
return true
}
}
}
return false
}
121 changes: 121 additions & 0 deletions pkg/sensors/tracing/kprobe_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"sync"
"syscall"
"testing"
"time"
"unsafe"

"github.com/cilium/ebpf"
Expand Down Expand Up @@ -5783,6 +5784,126 @@ spec:
assert.NoError(t, err)
}

func testKprobeRateLimit(t *testing.T, rateLimit bool) {
hook := `apiVersion: cilium.io/v1alpha1
kind: TracingPolicy
metadata:
name: "datagram"
spec:
kprobes:
- call: "ip_send_skb"
syscall: false
args:
- index: 1
type: "skb"
label: "datagram"
selectors:
- matchArgs:
- index: 1
operator: "DAddr"
values:
- "127.0.0.1"
- index: 1
operator: "DPort"
values:
- "9468"
- index: 1
operator: "Protocol"
values:
- "IPPROTO_UDP"
`

if rateLimit {
hook += `
matchActions:
- action: Post
rateLimit: "5"
rateLimitScope: "global"
`
}

var doneWG, readyWG sync.WaitGroup
defer doneWG.Wait()

ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime)
defer cancel()

createCrdFile(t, hook)
obs, err := observertesthelper.GetDefaultObserverWithFile(t, ctx, testConfigFile, tus.Conf().TetragonLib)
if err != nil {
t.Fatalf("GetDefaultObserverWithFile error: %s", err)
}
observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs)
readyWG.Wait()

server := "nc.openbsd"
cmdServer := exec.Command(server, "-unvlp", "9468", "-s", "127.0.0.1")
assert.NoError(t, cmdServer.Start())
time.Sleep(1 * time.Second)

// Generate 5 datagrams
socket, err := net.Dial("udp", "127.0.0.1:9468")
if err != nil {
fmt.Printf("ERROR dialing socket\n")
panic(err)
}

for i := 0; i < 5; i++ {
_, err := socket.Write([]byte("data"))
if err != nil {
fmt.Printf("ERROR writing to socket\n")
panic(err)
}
}

kpChecker := ec.NewProcessKprobeChecker("datagram-checker").
WithFunctionName(sm.Full("ip_send_skb")).
WithArgs(ec.NewKprobeArgumentListMatcher().
WithOperator(lc.Ordered).
WithValues(
ec.NewKprobeArgumentChecker().WithLabel(sm.Full("datagram")).
WithSkbArg(ec.NewKprobeSkbChecker().
WithDaddr(sm.Full("127.0.0.1")).
WithDport(9468).
WithProtocol(sm.Full("IPPROTO_UDP")),
),
))

var checkerSuccess *ec.UnorderedEventChecker
var checkerFailure *ec.UnorderedEventChecker
if rateLimit {
// Rate limit. We should have 1. We shouldn't have 2 (or more)
checkerSuccess = ec.NewUnorderedEventChecker(kpChecker)
checkerFailure = ec.NewUnorderedEventChecker(kpChecker, kpChecker)
} else {
// No rate limit. We should have 5. We shouldn't have 6.
checkerSuccess = ec.NewUnorderedEventChecker(kpChecker, kpChecker, kpChecker, kpChecker, kpChecker)
checkerFailure = ec.NewUnorderedEventChecker(kpChecker, kpChecker, kpChecker, kpChecker, kpChecker, kpChecker)
}
cmdServer.Process.Kill()

err = jsonchecker.JsonTestCheck(t, checkerSuccess)
assert.NoError(t, err)
err = jsonchecker.JsonTestCheckExpect(t, checkerFailure, true)
assert.NoError(t, err)
}

func TestKprobeNoRateLimit(t *testing.T) {
if !kernels.EnableLargeProgs() {
t.Skip("Test requires kernel 5.4")
}

testKprobeRateLimit(t, false)
}

func TestKprobeRateLimit(t *testing.T) {
if !kernels.EnableLargeProgs() {
t.Skip("Test requires kernel 5.4")
}

testKprobeRateLimit(t, true)
}

func TestKprobeListSyscallDupsRange(t *testing.T) {
if !kernels.MinKernelVersion("5.3.0") {
t.Skip("TestCopyFd requires at least 5.3.0 version")
Expand Down
Loading