From 277c2e4d632d72ef2e20e0797857ea6d1ee79ee5 Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 18 Nov 2022 15:03:01 +0100 Subject: [PATCH 1/5] tetragon: add support for gRPC over unix socket [upstream commit a63d33497977a72caf4eda6c17a5df631be8ce8e] Now we can provide unix://abosolute_path arguments to both the client (tetra) and the agent (tetragon). Signed-off-by: Kornilios Kourtis --- cmd/tetragon/addr.go | 35 ++++++++++++++++++++++++++ cmd/tetragon/addr_test.go | 52 +++++++++++++++++++++++++++++++++++++++ cmd/tetragon/main.go | 16 +++++++----- 3 files changed, 97 insertions(+), 6 deletions(-) create mode 100644 cmd/tetragon/addr.go create mode 100644 cmd/tetragon/addr_test.go diff --git a/cmd/tetragon/addr.go b/cmd/tetragon/addr.go new file mode 100644 index 00000000000..50194bf2fcb --- /dev/null +++ b/cmd/tetragon/addr.go @@ -0,0 +1,35 @@ +package main + +import ( + "fmt" + "path/filepath" + "strings" +) + +// splitListenAddr splits the user-provided address a to a proto and an address field to be used +// with net.Listen. +// +// addresses can be: +// unix://absolute_path for unix sockets +// : for TCP (more specifically, an address that can be passed to net.Listen) +// +// Note that the client (tetra) uses https://github.com/grpc/grpc-go/blob/v1.51.0/clientconn.go#L135 +// With the syntax is documented in https://github.com/grpc/grpc/blob/master/doc/naming.md. The +// server uses net.Listen. And so the two are not compatible because the client expects "ipv4" or +// "ipv6" for tcp connections. +// Hence, because we want the same string to work the same way both on the client and the server, we +// only support the two addresses above. +func splitListenAddr(arg string) (string, string, error) { + + if strings.HasPrefix(arg, "unix://") { + path := strings.TrimPrefix(arg, "unix://") + if !filepath.IsAbs(path) { + return "", "", fmt.Errorf("path %s (%s) is not absolute", path, arg) + } + return "unix", path, nil + } + + // assume everything else is TCP to support strings such as "localhost:51234" and let + // net.Listen figure things out. + return "tcp", arg, nil +} diff --git a/cmd/tetragon/addr_test.go b/cmd/tetragon/addr_test.go new file mode 100644 index 00000000000..0eb76482ba5 --- /dev/null +++ b/cmd/tetragon/addr_test.go @@ -0,0 +1,52 @@ +package main + +import "testing" + +func TestSplitListenAddr(t *testing.T) { + type testCase struct { + arg string + + expectedErr bool + proto, addr string + } + testCases := []testCase{ + { + arg: "unix:///var/run/tetragon/tetragon.sock", + proto: "unix", + addr: "/var/run/tetragon/tetragon.sock", + }, { + arg: "localhost:51234", + proto: "tcp", + addr: "localhost:51234", + }, { + // NB: expect error on relative paths + arg: "unix://var/run/tetragon/tetragon.sock", + expectedErr: true, + }, + } + + for _, c := range testCases { + proto, addr, err := splitListenAddr(c.arg) + if c.expectedErr { + if err == nil { + t.Fatalf("expected error for %s", c.arg) + } + continue + } + + if err != nil { + t.Fatalf("unexpected error for %s: %s", c.arg, err) + } + + if proto != c.proto { + t.Fatalf("Proto (%s) did not match expected value (%s) for %s", proto, c.proto, c.arg) + } + + if addr != c.addr { + t.Fatalf("Addr (%s) did not match expected value (%s) for %s", addr, c.addr, c.arg) + } + + t.Logf("case %+v is OK!", c) + } + +} diff --git a/cmd/tetragon/main.go b/cmd/tetragon/main.go index 8c6cbf9fd3f..b348239c867 100644 --- a/cmd/tetragon/main.go +++ b/cmd/tetragon/main.go @@ -359,19 +359,23 @@ func startExporter(ctx context.Context, server *server.Server) error { return nil } -func Serve(ctx context.Context, address string, server *server.Server) error { +func Serve(ctx context.Context, listenAddr string, server *server.Server) error { grpcServer := grpc.NewServer() tetragon.RegisterFineGuidanceSensorsServer(grpcServer, server) - go func(address string) { - listener, err := net.Listen("tcp", address) + proto, addr, err := splitListenAddr(listenAddr) + if err != nil { + return fmt.Errorf("failed to parse listen address: %w", err) + } + go func(proto, addr string) { + listener, err := net.Listen(proto, addr) if err != nil { - log.WithError(err).WithField("address", address).Fatal("Failed to start gRPC server") + log.WithError(err).WithField("protocol", proto).WithField("address", addr).Fatal("Failed to start gRPC server") } - log.WithField("address", address).Info("Starting gRPC server") + log.WithField("address", addr).WithField("protocol", proto).Info("Starting gRPC server") if err = grpcServer.Serve(listener); err != nil { log.WithError(err).Error("Failed to close gRPC server") } - }(address) + }(proto, addr) go func() { <-ctx.Done() grpcServer.Stop() From 1238bbfb2df132240b14e21dc59834a3462cd64e Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Tue, 22 Nov 2022 08:46:45 +0100 Subject: [PATCH 2/5] helm: remove grpc.port and fix livenessProbe [upstream commit 3c98248972ffb7e65437ac6a5bd82de9535e1935] Up until now, configuring gRPC listen address for the agent via the helm chart used two variables grpc.port and grpc.address. The previous patch added support for passing a unix socket address to the agent, which does not match well the grpc.address:grpc.port configuration. This patch removes the grpc.port helm variable, and, instead, relies only on grpc.address for configuration. Users can use "localhost:54321", ":54321", or "unix:///var/run/tetragon/tetragon.sock" to configure the gRPC address that the agent listens to. Furthermore, the livenessProbe of the agent relies on checking health status via the gRPC interface via the CLI (tetra status). This patch also fixes the livenessProbe so that: - it is only defined, if grpc is enabled - the proper gRPC address is used to contact the agent Signed-off-by: Kornilios Kourtis --- install/kubernetes/README.md | 3 +-- install/kubernetes/templates/_container_tetragon.tpl | 12 ++++++++---- install/kubernetes/templates/tetragon_configmap.yaml | 2 +- install/kubernetes/values.yaml | 6 ++---- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/install/kubernetes/README.md b/install/kubernetes/README.md index aac24fc163c..219326b37d9 100644 --- a/install/kubernetes/README.md +++ b/install/kubernetes/README.md @@ -64,9 +64,8 @@ Helm chart for Tetragon | tetragon.fieldFilters | string | `"{}"` | | | tetragon.gops.address | string | `"localhost"` | The address at which to expose gops. | | tetragon.gops.port | int | `8118` | The port at which to expose gops. | -| tetragon.grpc.address | string | `"localhost"` | The address at which to expose gRPC. Set it to "" to listen on all available interfaces. | +| tetragon.grpc.address | string | `"localhost:54321"` | The address at which to expose gRPC. Examples: localhost:54321, unix:///var/run/tetragon/tetragon.sock | | tetragon.grpc.enabled | bool | `true` | Whether to enable exposing Tetragon gRPC. | -| tetragon.grpc.port | int | `54321` | The port at which to expose gRPC. | | tetragon.image.override | string | `nil` | | | tetragon.image.repository | string | `"quay.io/cilium/tetragon"` | | | tetragon.image.tag | string | `"v0.8.3"` | | diff --git a/install/kubernetes/templates/_container_tetragon.tpl b/install/kubernetes/templates/_container_tetragon.tpl index d3a8b75c9ab..47089f93aea 100644 --- a/install/kubernetes/templates/_container_tetragon.tpl +++ b/install/kubernetes/templates/_container_tetragon.tpl @@ -68,11 +68,15 @@ resources: {{- toYaml . | nindent 4 }} {{- end }} +{{- if .Values.tetragon.grpc.enabled }} livenessProbe: - exec: - command: - - tetra - - status + exec: + command: + - tetra + - status + - --server-address + - {{ .Values.tetragon.grpc.address }} +{{- end -}} {{- end -}} {{- define "container.tetragon.init-operator" -}} diff --git a/install/kubernetes/templates/tetragon_configmap.yaml b/install/kubernetes/templates/tetragon_configmap.yaml index 5d63bbfd273..c39c9c415ef 100644 --- a/install/kubernetes/templates/tetragon_configmap.yaml +++ b/install/kubernetes/templates/tetragon_configmap.yaml @@ -39,7 +39,7 @@ data: metrics-server: "" {{- end }} {{- if .Values.tetragon.grpc.enabled }} - server-address: {{ .Values.tetragon.grpc.address }}:{{ .Values.tetragon.grpc.port }} + server-address: {{ .Values.tetragon.grpc.address }} {{- else }} {{- end }} {{- if .Values.tetragon.tcpStatsSampleSegs }} diff --git a/install/kubernetes/values.yaml b/install/kubernetes/values.yaml index 5bbb65c42ab..2402f3827a1 100644 --- a/install/kubernetes/values.yaml +++ b/install/kubernetes/values.yaml @@ -134,10 +134,8 @@ tetragon: grpc: # -- Whether to enable exposing Tetragon gRPC. enabled: true - # -- The address at which to expose gRPC. Set it to "" to listen on all available interfaces. - address: "localhost" - # -- The port at which to expose gRPC. - port: 54321 + # -- The address at which to expose gRPC. Examples: localhost:54321, unix:///var/run/tetragon/tetragon.sock + address: "localhost:54321" gops: # -- The address at which to expose gops. address: "localhost" From f69446ce98ffa52512157b1b66921264a86af78a Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 25 Nov 2022 11:24:37 +0100 Subject: [PATCH 3/5] tetragon: chmod unix socket to 0660 [upstream commit: 9aae81d74cecb66bc4ca802396f0aa074a1e790f] This patch ensures that the control unix socket for the agent has 0660 file permissions. Signed-off-by: Kornilios Kourtis --- cmd/tetragon/main.go | 9 +++++- pkg/unixlisten/unixlisten.go | 57 ++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 pkg/unixlisten/unixlisten.go diff --git a/cmd/tetragon/main.go b/cmd/tetragon/main.go index b348239c867..cbb694c68e2 100644 --- a/cmd/tetragon/main.go +++ b/cmd/tetragon/main.go @@ -36,6 +36,7 @@ import ( "github.com/cilium/tetragon/pkg/sensors/base" "github.com/cilium/tetragon/pkg/sensors/program" "github.com/cilium/tetragon/pkg/server" + "github.com/cilium/tetragon/pkg/unixlisten" "github.com/cilium/tetragon/pkg/version" "github.com/cilium/tetragon/pkg/watcher" "github.com/cilium/tetragon/pkg/watcher/crd" @@ -367,7 +368,13 @@ func Serve(ctx context.Context, listenAddr string, server *server.Server) error return fmt.Errorf("failed to parse listen address: %w", err) } go func(proto, addr string) { - listener, err := net.Listen(proto, addr) + var listener net.Listener + var err error + if proto == "unix" { + listener, err = unixlisten.ListenWithRename(addr, 0660) + } else { + listener, err = net.Listen(proto, addr) + } if err != nil { log.WithError(err).WithField("protocol", proto).WithField("address", addr).Fatal("Failed to start gRPC server") } diff --git a/pkg/unixlisten/unixlisten.go b/pkg/unixlisten/unixlisten.go new file mode 100644 index 00000000000..c7b10bfaa34 --- /dev/null +++ b/pkg/unixlisten/unixlisten.go @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright Authors of Tetragon +package unixlisten + +import ( + "fmt" + "net" + "os" + "path/filepath" +) + +// ListenWithRename creates a "unix" listener for the given path and the given mode +// +// Go's net.Listen() performs three system calls at once: +// - socket, where the file descriptor is created +// - bind, where the unix socket file is created +// - listen, where the socket can now accept connections +// +// Hence, doing a chmod(2) after Listen is racy because a client can connect +// between the listen(2) and the chmod(2) calls. One solution would be to use +// umask(2), but this is tricky to do in a multi-threaded program because it +// affects other files being created from different threads. Also, other +// threads may change the umask. +// +// This function, instead, creates the socket file in a private directory, +// performs the appropriate chmod and only then moves the file to its original +// location. Not sure about other systems, but at least on Linux renaming a +// unix socket file after the listen seems to work without issues. +func ListenWithRename(path string, mode os.FileMode) (net.Listener, error) { + os.Remove(path) + + baseName := filepath.Base(path) + dirName := filepath.Dir(path) + + // Create a temporary directory: MkdirTemp creates the directory with 0700 + tmpDir, err := os.MkdirTemp(dirName, fmt.Sprintf("%s-dir-*", baseName)) + if err != nil { + return nil, err + } + defer os.RemoveAll(tmpDir) + + tmpPath := filepath.Join(tmpDir, baseName) + l, err := net.Listen("unix", tmpPath) + if err != nil { + return nil, err + } + + if err := os.Chmod(tmpPath, mode); err != nil { + return nil, err + } + + err = os.Rename(tmpPath, path) + if err != nil { + return nil, err + } + return l, nil +} From 6d1c6e1e9130559b84848133c0b3b05902e57e9e Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 25 Nov 2022 12:37:21 +0100 Subject: [PATCH 4/5] Makefile.cli: deal with {g,u}id collision [ upstream commit c338a633b4818dba9ba327188531da4a1f76c9f7 ] CLI builds have been failing with: > addgroup: gid '123' in use Fix this by redoing the addgroup, adduser command without a specific gid/uid. Signed-off-by: Kornilios Kourtis --- Makefile.cli | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.cli b/Makefile.cli index 796c3704053..c18d49e6495 100644 --- a/Makefile.cli +++ b/Makefile.cli @@ -13,8 +13,8 @@ cli-release: --workdir /tetragon \ --volume `pwd`:/tetragon docker.io/library/golang:1.19.0-alpine3.16 \ sh -c "apk add --no-cache make git && \ - addgroup -g $(RELEASE_GID) release && \ - adduser -u $(RELEASE_UID) -D -G release release && \ + (addgroup -g $(RELEASE_GID) release || addgroup release )&& \ + (adduser -u $(RELEASE_UID) -D -G release release || adduser -D -G release release )&& \ su release -c 'make cli-local-release VERSION=${VERSION}'" cli-local-release: cli-clean From 96bac9b04adbadfd702e58e256182b2b78fd193f Mon Sep 17 00:00:00 2001 From: Kornilios Kourtis Date: Fri, 9 Dec 2022 15:54:39 +0100 Subject: [PATCH 5/5] Makefile: Fix potential uid/gid collision by using setpriv [upstream commit 0fe4085d2d69b75602f82c9c4d734f9e7b8224e5] Initial fix for for this issue had the issue that the uid/gid of the generated artefact would not match the uid/gid of the `make` caller. Fix this using setpriv. This approach was taken from 69822cd7789f368f280870429ba833005671316e in cilum/hubble, so copying the excellent message of above commit from Sebastian: """ The release build should run with the same numeric user and group id as the caller of the Makefile. This solves two problems: 1. The generated artefacts should be owned by the user who invoked the Makefile 2. `go build -buildvcs=true` (the default since Go 1.18) invokes git, and git requires that the .git folder is owned by the user invoking it. Previously, we achieved this by creating a new user and group inside the container with the wanted uid/gid. The problem with that approach is it fails if the uid/gid is already taken (such as e.g. gid 123, which seems to be used by the GitHub runner as of recently). Therefore, instead of trying to create a new user, this commit now uses `setpriv` from util-linux to force the `make` process (and all its children) to run as the RELEASE_{UID/GID}. This ensures that both `git` can access the `.git` directory owned by the host user, and that the host user can access the generated artifacts. One limitation of `setpriv` is that it runs the child process without an accessible `$HOME` directory, which `go build` needs for the GOCACHE. To solve this for now, we point it to a temporary directory. In the future, we could consider using a GOCACHE owned by the host, to allow cache-reuse across builds. """ fixes: c338a633b4818dba9ba327188531da4a1f76c9f7 Signed-off-by: Kornilios Kourtis --- Makefile.cli | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/Makefile.cli b/Makefile.cli index c18d49e6495..bda8f7a8a9f 100644 --- a/Makefile.cli +++ b/Makefile.cli @@ -12,10 +12,9 @@ cli-release: --rm \ --workdir /tetragon \ --volume `pwd`:/tetragon docker.io/library/golang:1.19.0-alpine3.16 \ - sh -c "apk add --no-cache make git && \ - (addgroup -g $(RELEASE_GID) release || addgroup release )&& \ - (adduser -u $(RELEASE_UID) -D -G release release || adduser -D -G release release )&& \ - su release -c 'make cli-local-release VERSION=${VERSION}'" + sh -c "apk add --no-cache make git setpriv && \ + /usr/bin/setpriv --reuid=$(RELEASE_UID) --regid=$(RELEASE_GID) --clear-groups \ + make GOCACHE=/tmp/cache cli-local-release VERSION=${VERSION}" cli-local-release: cli-clean set -o errexit; \