From 9a8f8927987750ea8057845b4453fa040ee05976 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 26 Sep 2023 20:00:53 +0000 Subject: [PATCH] tetragon: Add test for exit events race The previous commit fixes the exit event race that might cause tetragon to receive multiple exit events with same pid values. The contrib/tester-progs/threads-exit program tries to exploit this by creating multi threads and synchronize all their exit calls so it's likely to hit the race window. The TestEventExitThreads test itself spawn several executions of threads-exit program (to push the luck a bit and hit the race window at least once) and records their pid values and then check we receive single exit event for any given pid value. Signed-off-by: Jiri Olsa --- contrib/tester-progs/Makefile | 6 ++- contrib/tester-progs/threads-exit.c | 71 ++++++++++++++++++++++++++++ pkg/sensors/exec/exec_test.go | 73 +++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 contrib/tester-progs/threads-exit.c diff --git a/contrib/tester-progs/Makefile b/contrib/tester-progs/Makefile index 96cc038f105..e2a67f1cbd6 100644 --- a/contrib/tester-progs/Makefile +++ b/contrib/tester-progs/Makefile @@ -16,7 +16,8 @@ PROGS = sigkill-tester \ uprobe-test-2 \ lseek-pipe \ threads-tester \ - bench-reader + bench-reader \ + threads-exit all: $(PROGS) @@ -29,6 +30,9 @@ bench-reader: bench-reader.c threads-tester: threads-tester.c $(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread +threads-exit: threads-exit.c + $(GCC) -Wall -fno-inline $< -o $@ -lcap -lpthread + capabilities-tester: capabilities-tester.c $(GCC) -Wall $< -o $@ -lcap diff --git a/contrib/tester-progs/threads-exit.c b/contrib/tester-progs/threads-exit.c new file mode 100644 index 00000000000..2614c576d06 --- /dev/null +++ b/contrib/tester-progs/threads-exit.c @@ -0,0 +1,71 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int goo; + +static void *worker(void *ctx) +{ + int ready_out = (intptr_t) ctx; + + write(ready_out, "R", 1); + + while (!goo) {} + syscall(SYS_exit, 0); + return NULL; +} + +int main(void) +{ + int ncpus = get_nprocs(), nthreads = ncpus * 10; + int i, err, readyfds[2]; + pthread_t th[nthreads]; + cpu_set_t set; + char dummy; + + /* make sure we can run on all cpus */ + CPU_ZERO(&set); + for (i = 0; i < ncpus; i++) + CPU_SET(i, &set); + if (sched_setaffinity(0, sizeof(set), &set) == -1) { + perror("sched_setaffinity"); + return -1; + } + + + if (pipe(readyfds)) { + perror("pipe"); + return -1; + } + + /* print out group leader for test checker */ + printf("TGID %d\n", getpid()); + fflush(NULL); + + for (i = 0; i < nthreads; i++) { + err = pthread_create(&th[i], NULL, worker, (void*)(intptr_t) readyfds[1]); + if (err) { + perror("pthread_create"); + return -1; + } + } + + /* Make sure all threads started.. */ + for (i = 0; i < nthreads; i++) { + if (read(readyfds[0], &dummy, 1) != 1) { + perror("read"); + return -1; + } + } + + /* .. and then tell threads to exit */ + goo = 1; + syscall(SYS_exit, 0); +} diff --git a/pkg/sensors/exec/exec_test.go b/pkg/sensors/exec/exec_test.go index 3ad1435dd65..e3532fa237b 100644 --- a/pkg/sensors/exec/exec_test.go +++ b/pkg/sensors/exec/exec_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/cilium/ebpf" + "github.com/cilium/tetragon/api/v1/tetragon" ec "github.com/cilium/tetragon/api/v1/tetragon/codegen/eventchecker" "github.com/cilium/tetragon/pkg/api" "github.com/cilium/tetragon/pkg/api/dataapi" @@ -154,6 +155,78 @@ func TestNamespaces(t *testing.T) { assert.NoError(t, err) } +func TestEventExitThreads(t *testing.T) { + var doneWG, readyWG sync.WaitGroup + defer doneWG.Wait() + + ctx, cancel := context.WithTimeout(context.Background(), tus.Conf().CmdWaitTime) + defer cancel() + + obs, err := observertesthelper.GetDefaultObserver(t, ctx, tus.Conf().TetragonLib, observertesthelper.WithMyPid()) + if err != nil { + t.Fatalf("Failed to run observer: %s", err) + } + observertesthelper.LoopEvents(ctx, t, &doneWG, &readyWG, obs) + readyWG.Wait() + + testThreadsExit := testutils.RepoRootPath("contrib/tester-progs/threads-exit") + + // array of all pids we shuold receive in exet events + tgids := make(map[int]bool) + + // running the workload 10 times to make the change we hit the race + // window bigger and collect all tgids from testThreadsExit output + for i := 0; i < 10; i++ { + out, err := exec.Command(testThreadsExit).Output() + if err != nil { + t.Fatalf("Failed to execute test binary: %s\n", err) + } + + tgid := 0 + if n, err := fmt.Sscanf(string(out[:]), "TGID %d", &tgid); n != 1 || err != nil { + t.Fatalf("Failed to parse test binary output: %s\n", err) + } + tgids[tgid] = false + } + + // check we got single exit event for each testThreadsExit + // execution and no more + nextCheck := func(event ec.Event, l *logrus.Logger) (bool, error) { + switch ev := event.(type) { + case *tetragon.ProcessExit: + if ev.Process.Binary != testThreadsExit { + return false, nil + } + // Make sure there's only single exit event with given pid + pid := int(ev.Process.Pid.GetValue()) + assert.False(t, tgids[pid], "got extra exit event with pid %d", pid) + tgids[pid] = true + return false, nil + default: + return false, nil + + } + } + + finalCheck := func(l *logrus.Logger) error { + // Make sure we saw all pids + for pid, used := range tgids { + assert.True(t, used, "did not see exit event for pid %d", pid) + } + return nil + } + + checker_ := ec.FnEventChecker{ + NextCheckFn: nextCheck, + FinalCheckFn: finalCheck, + } + + checker := testsensor.NewTestChecker(&checker_) + + err = jsonchecker.JsonTestCheck(t, checker) + assert.NoError(t, err) +} + func TestEventExecve(t *testing.T) { var doneWG, readyWG sync.WaitGroup defer doneWG.Wait()