From e01d7e1f9a64750a75ba419eabf8a94463622790 Mon Sep 17 00:00:00 2001 From: Chris Henzie Date: Mon, 10 Aug 2020 07:09:55 -0700 Subject: [PATCH] HTTP server for liveness probe, check registration socket exists --- README.md | 14 ++++++++ cmd/csi-node-driver-registrar/main.go | 1 + .../node_register.go | 30 ++++++++++++++++ pkg/util/util_linux.go | 20 ++++++++--- pkg/util/util_test.go | 36 +++++++++++++++++++ pkg/util/util_windows.go | 24 ++++++++----- 6 files changed, 112 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index e44fd7202..6d221f6fd 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,11 @@ There are two UNIX domain sockets used by the node-driver-registrar: `/var/lib/kubelet/plugins//csi.sock). Note this is NOT the path to the registration socket. +### Optional arguments + +* `--health-port`: This is the port of the health check server for the node-driver-registrar. + A value of 0 will disable the server. + ### Required permissions The node-driver-registrar does not interact with the Kubernetes API, so no RBAC @@ -76,11 +81,19 @@ the actual driver's name. args: - "--csi-address=/csi/csi.sock" - "--kubelet-registration-path=/var/lib/kubelet/plugins//csi.sock" + - "--health-port=9809" volumeMounts: - name: plugin-dir mountPath: /csi - name: registration-dir mountPath: /registration + ports: + - containerPort: 9809 + name: healthz + livenessProbe: + httpGet: + path: /healthz + port: healthz volumes: - name: registration-dir hostPath: @@ -106,3 +119,4 @@ You can reach the maintainers of this project at: ### Code of conduct Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md). + diff --git a/cmd/csi-node-driver-registrar/main.go b/cmd/csi-node-driver-registrar/main.go index 7c8aa1cf7..1a7e54bfe 100644 --- a/cmd/csi-node-driver-registrar/main.go +++ b/cmd/csi-node-driver-registrar/main.go @@ -47,6 +47,7 @@ var ( connectionTimeout = flag.Duration("connection-timeout", 0, "The --connection-timeout flag is deprecated") csiAddress = flag.String("csi-address", "/run/csi/socket", "Path of the CSI driver socket that the node-driver-registrar will connect to.") kubeletRegistrationPath = flag.String("kubelet-registration-path", "", "Path of the CSI driver socket on the Kubernetes host machine.") + healthzPort = flag.Int("health-port", 0, "TCP port for healthz requests. Set to 0 to disable the healthz server.") showVersion = flag.Bool("version", false, "Show version.") version = "unknown" diff --git a/cmd/csi-node-driver-registrar/node_register.go b/cmd/csi-node-driver-registrar/node_register.go index 8656b7b33..0d245631a 100644 --- a/cmd/csi-node-driver-registrar/node_register.go +++ b/cmd/csi-node-driver-registrar/node_register.go @@ -19,9 +19,11 @@ package main import ( "fmt" "net" + "net/http" "os" "os/signal" "runtime" + "strconv" "syscall" "google.golang.org/grpc" @@ -64,6 +66,7 @@ func nodeRegister( // Registers kubelet plugin watcher api. registerapi.RegisterRegistrationServer(grpcServer, registrar) + go healthzServer(socketPath, *healthzPort) go removeRegSocket(csiDriverName) // Starts service if err := grpcServer.Serve(lis); err != nil { @@ -78,6 +81,33 @@ func buildSocketPath(csiDriverName string) string { return fmt.Sprintf("/registration/%s-reg.sock", csiDriverName) } +func healthzServer(socketPath string, port int) { + if port == 0 { + klog.Infof("Skipping healthz server because port set to: %v", port) + return + } + klog.Infof("Starting healthz server at: :%v\n", port) + + http.HandleFunc("/healthz", func(w http.ResponseWriter, req *http.Request) { + socketExists, err := util.DoesSocketExist(socketPath) + if err == nil && socketExists { + w.WriteHeader(http.StatusOK) + w.Write([]byte(`ok`)) + klog.V(5).Infof("health check succeeded") + } else if err != nil { + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte(err.Error())) + klog.Errorf("health check failed: %+v", err) + } else if !socketExists { + w.WriteHeader(http.StatusInternalServerError) + w.Write([]byte("registration socket does not exist")) + klog.Errorf("health check failed, registration socket does not exist") + } + }) + + klog.Fatal(http.ListenAndServe(":"+strconv.Itoa(port), nil)) +} + func removeRegSocket(csiDriverName string) { sigc := make(chan os.Signal, 1) signal.Notify(sigc, syscall.SIGTERM) diff --git a/pkg/util/util_linux.go b/pkg/util/util_linux.go index 317955245..00ccbbd39 100644 --- a/pkg/util/util_linux.go +++ b/pkg/util/util_linux.go @@ -30,15 +30,25 @@ func Umask(mask int) (int, error) { } func CleanupSocketFile(socketPath string) error { - fi, err := os.Stat(socketPath) - if err == nil && (fi.Mode()&os.ModeSocket) != 0 { - // Remove any socket, stale or not, but fall through for other files + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + return err + } + if socketExists { if err := os.Remove(socketPath); err != nil { return fmt.Errorf("failed to remove stale socket %s with error: %+v", socketPath, err) } } + return nil +} + +func DoesSocketExist(socketPath string) (bool, error) { + fi, err := os.Stat(socketPath) + if err == nil && (fi.Mode()&os.ModeSocket) != 0 { + return true, nil + } if err != nil && !os.IsNotExist(err) { - return fmt.Errorf("failed to stat the socket %s with error: %+v", socketPath, err) + return false, fmt.Errorf("failed to stat the socket %s with error: %+v", socketPath, err) } - return nil + return false, nil } diff --git a/pkg/util/util_test.go b/pkg/util/util_test.go index 4b18f2fe2..9a37e65fb 100644 --- a/pkg/util/util_test.go +++ b/pkg/util/util_test.go @@ -37,7 +37,15 @@ func TestSocketFileDoesNotExist(t *testing.T) { t.Fatalf("could not create temp dir: %v", err) } defer os.RemoveAll(testDir) + socketPath := filepath.Join(testDir, socketFileName) + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + t.Fatalf("check for existence returned error: %+v", err) + } + if socketExists { + t.Fatalf("socket exists when it should not") + } // Negative test, file is not created. So file name in current path used. err = CleanupSocketFile(socketPath) if err != nil { @@ -56,6 +64,13 @@ func TestSocketPathDoesNotExist(t *testing.T) { os.RemoveAll(testDir) socketPath := filepath.Join(testDir, socketFileName) + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + t.Fatalf("check for existence returned error: %+v", err) + } + if socketExists { + t.Fatalf("socket exists when it should not") + } err = CleanupSocketFile(socketPath) if err != nil { t.Fatalf("cleanup returned error: %+v", err) @@ -79,6 +94,14 @@ func TestSocketPathSimple(t *testing.T) { os.Exit(1) } + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + t.Fatalf("check for existence returned error: %+v", err) + } + if !socketExists { + t.Fatalf("socket does not exist when it should") + } + err = CleanupSocketFile(socketPath) if err != nil { t.Fatalf("cleanup returned error: %+v", err) @@ -110,6 +133,19 @@ func TestSocketRegularFile(t *testing.T) { } f.Close() + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + t.Fatalf("check for existence returned error: %+v", err) + } + // See comments in CleanupSocketFile for differences in windows and linux behavior checking for sockets. + if runtime.GOOS == "windows" { + if !socketExists { + t.Fatalf("socket does not exist when it should") + } + } else if socketExists { + t.Fatalf("socket exists when it should not") + } + err = CleanupSocketFile(socketPath) if err != nil { t.Fatalf("cleanup returned error: %+v", err) diff --git a/pkg/util/util_windows.go b/pkg/util/util_windows.go index faa1e0004..2d2c8bb97 100644 --- a/pkg/util/util_windows.go +++ b/pkg/util/util_windows.go @@ -29,19 +29,27 @@ func Umask(mask int) (int, error) { } func CleanupSocketFile(socketPath string) error { - if _, err := os.Lstat(socketPath); err != nil { - // If the file does not exist, then the cleanup can be considered successful. - if os.IsNotExist(err) { - return nil + socketExists, err := DoesSocketExist(socketPath) + if err != nil { + return err + } + if socketExists { + if err := os.Remove(socketPath); err != nil { + return fmt.Errorf("failed to remove stale socket %s with error: %+v", socketPath, err) } - return fmt.Errorf("failed to lstat the socket %s with error: %+v", socketPath, err) } + return nil +} +func DoesSocketExist(socketPath string) (bool, error) { // TODO: Until the bug - https://github.com/golang/go/issues/33357 is fixed, os.stat wouldn't return the // right mode(socket) on windows. Hence deleting the file, without checking whether // its a socket, on windows. - if err := os.Remove(socketPath); err != nil { - return fmt.Errorf("failed to remove stale socket %s with error: %+v", socketPath, err) + if _, err := os.Lstat(socketPath); err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, fmt.Errorf("failed to lstat the socket %s with error: %+v", socketPath, err) } - return nil + return true, nil }