cilium · olsajiri · Jul 31, 2024 · Jul 25, 2024 · Jul 26, 2024 · Jul 26, 2024
@@ -0,0 +1,13 @@
+#!/bin/sh
+
+map=$1
+dump=
+
+if [ -f ${map} ]; then
+  dump="pinned ${map}"
+else
+  dump="id ${map}"
+fi
+
+bpftool map dump ${dump} | \
+jq .[] | jq -r '["id","cpu","curr","prev","rate","time","throttled"],[.key.id] + (.values[] | [.cpu] + (.value | [.curr,.prev,.rate,.time,.throttled])) | @tsv'
@@ -0,0 +1,157 @@
+---
+title: "Cgroup rate throtling"
+weight: 2
+description: "Monitor and throttle cgroup events rate"
+---
+
+This page shows you how to configure per-cgroup rate monitoring.
+
+
+## Concept
+
+The idea is that tetragon monitors events rate per cgroup and throttle
+them (stops posting its events) if they cross configured threshold.
+
+The throttled cgroup is monitored and if its traffic gets stable under
+the limit again, it stops the cgroup throttling and tetragon resumes
+receiving the cgroup's events.
+
+The throttle action generates following events:
+
+- `THROTTLE` start event is sent when the group rate limit is crossed
+- `THROTTLE` stop event is sent when the cgroup rate is again below the limit stable for 5 seconds
+
+**NOTE** The threshold for given cgroup is monitored *per CPU*.
+When the events are spread around on multiple CPUs we will throttle
+them per CPU only if they cross the threshold on that CPU.
+
+**NOTE** At the moment we monitor and limit base sensor events:
+  - `PROCESS_EXEC`
+  - `PROCESS_EXIT`
+
+
+## Setup
+
+The cgroup rate is configured with `--cgroup-rate` option:
+
+```
+--cgroup-rate string
+  Base sensor events cgroup rate <events,interval> disabled by default
+  ('1000,1s' means rate 1000 events per second)
+```
+
+- `--cgroup-rate=10,1s`
+
+   sets the cgroup threshold on 10 events per 1 second
+
+- `--cgroup-rate=1000,1s`
+
+   sets the cgroup threshold on 1000 events per 1 second
+
+- `--cgroup-rate=100,1m`
+
+    sets the cgroup threshold on 1000 events per 1 minutes
+
+- `--cgroup-rate=10000,10m`
+
+    sets the cgroup threshold on 1000 events per 10 minutes
+
+
+## Events
+
+The throttle events contains fields as follows.
+
+- `THROTTLE_START`
+
+```json
+{
+  "process_throttle": {
+    "type": "THROTTLE_START",
+    "cgroup": "session-429.scope"
+  },
+  "node_name": "ubuntu-22",
+  "time": "2024-07-26T13:07:43.178407128Z"
+}
+```
+
+- `THROTTLE_STOP`
+
+```json
+  "process_throttle": {
+    "type": "THROTTLE_STOP",
+    "cgroup": "session-429.scope"
+  },
+  "node_name": "ubuntu-22",
+  "time": "2024-07-26T13:07:55.501718877Z"
+```
+
+
+## Example
+
+This example shows how to generate throttle events when cgroup rate monitoring is enabled.
+
+
+- Start tetragon with cgroup rate monitoring 10 events per second, the successfull configuration will show in tetragon log
+
+```
+# tetragon --bpf-lib ./bpf/objs/ --cgroup-rate=10,1s
+...
+time="2024-07-26T13:33:19Z" level=info msg="Cgroup rate started (10/1s)"
+...
+```
+
+- Spawn more than 10 events per second
+
+```
+$ while :; do sleep 0.001s; done
+```
+
+- Monitor events shows throttling
+
+
+```
+$ tetra getevents -o compact
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+🧬 throttle START session-429.scope
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+💥 exit    ubuntu-22 /usr/bin/sleep 0.001s 0
+🚀 process ubuntu-22 /usr/bin/sleep 0.001s
+
+🧬 throttle STOP  session-429.scope
+```
+
+When you stop the while loop from thr other terminal you will get above `throttle STOP` event after 5 seconds.
+
+
+##  Limitations
+
+- The cgroup rate is monitored per CPU
+
+- At the moment we monitor and limit base sensor and kprobe events:
+  - `PROCESS_EXEC`
+  - `PROCESS_EXIT`
@@ -0,0 +1,89 @@
+---
+title: "Persistent enforcement"
+weight: 2
+description: "How to configure persistent enforcement"
+---
+
+This page shows you how to configure persistent enforcement.
+
+## Concept
+
+The idea of persistent enforcement is to allow he enforcement policy to continue
+running even when its tetragon process is gone.
+
+This is configured with `--keep-sensors-on-exit` option.
+
+When the tetragon process exits the policy stays active, because it's pinned
+in sysfs bpf tree under `/sys/fs/bpf/tetragon` directory.
+
+When new tetragon process is started it performs following actions:
+
+- checks if there's existing `/sys/fs/bpf/tetragon` and moves it to `/sys/fs/bpf/tetragon_old` directory
+- setups configured policy
+- removes `/sys/fs/bpf/tetragon_old` directory.
+
+## Example
+
+This example shows how the persistent enforcement works on simple tracing policy.
+
+- Consider following enforcement tracing policy that kills any process that touches `/tmp/tetragon` file.
+
+```yaml
+apiVersion: cilium.io/v1alpha1
+kind: TracingPolicy
+metadata:
+  name: "enforcement"
+spec:
+  kprobes:
+  - call: "fd_install"
+    syscall: false
+    args:
+    - index: 0
+      type: int
+    - index: 1
+      type: "file"
+    selectors:
+    - matchArgs:
+      - index: 1
+        operator: "Equal"
+        values:
+        - "/tmp/tetragon"
+      matchActions:
+      - action: Sigkill
+```
+
+- Spawn tetragon with above policy and `--keep-sensors-on-exit` option.
+
+```
+# tetragon ... --keep-sensors-on-exit
+```
+
+- Verify the enforcement policy is in place.
+
+```
+$ cat /tmp/tetragon
+Killed
+```
+
+- Kill tetragon
+
+```
+time="2024-07-26T14:47:45Z" level=info msg="Perf ring buffer size (bytes)" percpu=68K total=272K
+time="2024-07-26T14:47:45Z" level=info msg="Perf ring buffer events queue size (events)" size=63K
+time="2024-07-26T14:47:45Z" level=info msg="Listening for events..."
+^C
+time="2024-07-26T14:50:50Z" level=info msg="Received signal interrupt, shutting down..."
+time="2024-07-26T14:50:50Z" level=info msg="Listening for events completed." error="context canceled"
+```
+
+- Verify the enforcement policy is **STILL** in place.
+
+```
+$ cat /tmp/tetragon
+Killed
+```
+
+##  Limitations
+
+At the moment we are not able to receive any events during the tetragon down time,
+it's just the enforcement that's in place.
@@ -41,7 +41,9 @@ import (
 )
 
 const (
-	aliveCnt = 5
+	aliveCnt            = 5
+	cleanupInterval     = time.Minute
+	cleanupInactiveTime = time.Minute
 )
 
 var (
@@ -62,6 +64,7 @@ type CgroupRate struct {
 	opts     *option.CgroupRate
 	hash     *program.Map
 	cgroups  map[uint64]string
+	cleanup  time.Duration
 }
 
 func newCgroupRate(
@@ -166,6 +169,42 @@ func (r *CgroupRate) processCgroups() {
 	for _, id := range remove {
 		delete(r.cgroups, id)
 	}
+
+	r.cleanupCgroups(last)
+}
+
+func (r *CgroupRate) cleanupCgroups(curr time.Duration) {
+	if r.cleanup == 0 {
+		r.cleanup = curr
+		return
+	}
+	// Run the cleanup once per cleanupInterval time
+	if curr-r.cleanup < cleanupInterval {
+		return
+	}
+	r.cleanup = curr
+
+	hash := r.hash.MapHandle
+	key := processapi.CgroupRateKey{}
+	values := make([]processapi.CgroupRateValue, bpf.GetNumPossibleCPUs())
+
+	entries := hash.Iterate()
+	for entries.Next(&key, &values) {
+		remove := true
+		// Remove values that are inactive for longer than cleanupInactiveTime time
+		for _, val := range values {
+			if time.Duration(val.Time)+cleanupInactiveTime > curr {
+				remove = false
+			}
+		}
+		if remove {
+			if err := hash.Delete(key); err != nil {
+				cgroupratemetrics.CgroupRateTotalInc(cgroupratemetrics.DeleteFail)
+			} else {
+				cgroupratemetrics.CgroupRateTotalInc(cgroupratemetrics.Delete)
+			}
+		}
+	}
 }
 
 func (r *CgroupRate) processCgroup(id uint64, cgroup string, last uint64) bool {

@@ -16,17 +16,21 @@ const (
 	ThrottleStop
 	LookupFail
 	UpdateFail
+	DeleteFail
 	Check
 	Process
+	Delete
 )
 
 var totalLabelValues = map[CgroupRateType]string{
 	ThrottleStart: "throttle_start",
 	ThrottleStop:  "throttle_stop",
 	LookupFail:    "lookup_fail",
 	UpdateFail:    "update_fail",
+	DeleteFail:    "delete_fail",
 	Check:         "check",
 	Process:       "process",
+	Delete:        "delete",
 }
 
 func (e CgroupRateType) String() string {

@@ -249,6 +249,7 @@ func (s *Sensor) loadMaps(bpfDir string) error {
 			"sensor": s.Name,
 			"map":    m.Name,
 			"path":   pinPath,
+			"max":    m.Entries,
 		}).Info("tetragon, map loaded.")
 	}