From 9bbaf278312cdf533430b1c5620cad1742f5a395 Mon Sep 17 00:00:00 2001 From: Alvar Penning Date: Fri, 14 Oct 2022 11:06:33 +0200 Subject: [PATCH] Prometheus metric for repository's last update A new gauge metric `rest_server_repo_last_update_timestamp` was added to monitor each repository's last write access. This allows a basic monitoring for each repository's freshness. In order to have this metric available at startup, a basic preloading for Prometheus metrics has been implemented. This operates by scanning the file system for restic repositories and using their last modified time. Subsequently, each write access updates the last update time. If scanning each repository takes too long, it can be disabled through the `--prometheus-no-preload` flag. This might be related to the feature request in #176. --- README.md | 37 +++++----- changelog/unreleased/pull-197 | 17 +++++ cmd/rest-server/main.go | 7 ++ handlers.go | 130 +++++++++++++++++++++++++++++----- metrics.go | 26 +++++-- repo/repo.go | 17 +++++ 6 files changed, 194 insertions(+), 40 deletions(-) create mode 100644 changelog/unreleased/pull-197 diff --git a/README.md b/README.md index 93eeb04b..fbabd9f3 100644 --- a/README.md +++ b/README.md @@ -32,24 +32,25 @@ Usage: rest-server [flags] Flags: - --append-only enable append only mode - --cpu-profile string write CPU profile to file - --debug output debug messages - -h, --help help for rest-server - --htpasswd-file string location of .htpasswd file (default: "/.htpasswd") - --listen string listen address (default ":8000") - --log filename write HTTP requests in the combined log format to the specified filename - --max-size int the maximum size of the repository in bytes - --no-auth disable .htpasswd authentication - --no-verify-upload do not verify the integrity of uploaded data. DO NOT enable unless the rest-server runs on a very low-power device - --path string data directory (default "/tmp/restic") - --private-repos users can only access their private repo - --prometheus enable Prometheus metrics - --prometheus-no-auth disable auth for Prometheus /metrics endpoint - --tls turn on TLS support - --tls-cert string TLS certificate path - --tls-key string TLS key path - -v, --version version for rest-server + --append-only enable append only mode + --cpu-profile string write CPU profile to file + --debug output debug messages + -h, --help help for rest-server + --htpasswd-file string location of .htpasswd file (default: "/.htpasswd)" + --listen string listen address (default ":8000") + --log filename write HTTP requests in the combined log format to the specified filename + --max-size int the maximum size of the repository in bytes + --no-auth disable .htpasswd authentication + --no-verify-upload do not verify the integrity of uploaded data. DO NOT enable unless the rest-server runs on a very low-power device + --path string data directory (default "/tmp/restic") + --private-repos users can only access their private repo + --prometheus enable Prometheus metrics + --prometheus-no-auth disable auth for Prometheus /metrics endpoint + --prometheus-no-preload disable preloading Prometheus metrics during startup + --tls turn on TLS support + --tls-cert string TLS certificate path + --tls-key string TLS key path + -v, --version version for rest-server ``` By default the server persists backup data in the OS temporary directory (`/tmp/restic` on Linux/BSD and others, in `%TEMP%\\restic` in Windows, etc). **If `rest-server` is launched using the default path, all backups will be lost**. To start the server with a custom persistence directory and with authentication disabled: diff --git a/changelog/unreleased/pull-197 b/changelog/unreleased/pull-197 new file mode 100644 index 00000000..1bf8975c --- /dev/null +++ b/changelog/unreleased/pull-197 @@ -0,0 +1,17 @@ +Enhancement: Prometheus metric for repository's last update + +A new gauge metric `rest_server_repo_last_update_timestamp` was added to +monitor each repository's last write access. This allows a basic +monitoring for each repository's freshness. + +This metric can be configured as an alerting rule. For example, to be +notified if some repository is older than two days: +> time() - rest_server_repo_last_update_timestamp >= 172800 + +In order to have this metric available at startup, a basic preloading for +Prometheus metrics has been implemented. This operates by scanning the file +system for restic repositories and using their last modified time. +Subsequently, each write access updates the last update time. + +If scanning each repository takes too long, it can be disabled through the +`--prometheus-no-preload` flag. diff --git a/cmd/rest-server/main.go b/cmd/rest-server/main.go index 8ed850b6..b5120413 100644 --- a/cmd/rest-server/main.go +++ b/cmd/rest-server/main.go @@ -54,6 +54,7 @@ func init() { flags.BoolVar(&server.PrivateRepos, "private-repos", server.PrivateRepos, "users can only access their private repo") flags.BoolVar(&server.Prometheus, "prometheus", server.Prometheus, "enable Prometheus metrics") flags.BoolVar(&server.PrometheusNoAuth, "prometheus-no-auth", server.PrometheusNoAuth, "disable auth for Prometheus /metrics endpoint") + flags.BoolVar(&server.PrometheusNoPreload, "prometheus-no-preload", server.PrometheusNoPreload, "disable preloading Prometheus metrics during startup") } var version = "0.11.0" @@ -126,6 +127,12 @@ func runRoot(cmd *cobra.Command, args []string) error { log.Println("Private repositories disabled") } + if server.Prometheus && !server.PrometheusNoPreload { + if err := server.PreloadMetrics(); err != nil { + return fmt.Errorf("unable to preload metrics: %w", err) + } + } + enabledTLS, privateKey, publicKey, err := tlsSettings() if err != nil { return err diff --git a/handlers.go b/handlers.go index f1f3954c..75c5ef17 100644 --- a/handlers.go +++ b/handlers.go @@ -2,8 +2,11 @@ package restserver import ( "errors" + "fmt" + "io/fs" "log" "net/http" + "os" "path" "path/filepath" "strings" @@ -14,23 +17,24 @@ import ( // Server encapsulates the rest-server's settings and repo management logic type Server struct { - Path string - HtpasswdPath string - Listen string - Log string - CPUProfile string - TLSKey string - TLSCert string - TLS bool - NoAuth bool - AppendOnly bool - PrivateRepos bool - Prometheus bool - PrometheusNoAuth bool - Debug bool - MaxRepoSize int64 - PanicOnError bool - NoVerifyUpload bool + Path string + HtpasswdPath string + Listen string + Log string + CPUProfile string + TLSKey string + TLSCert string + TLS bool + NoAuth bool + AppendOnly bool + PrivateRepos bool + Prometheus bool + PrometheusNoAuth bool + PrometheusNoPreload bool + Debug bool + MaxRepoSize int64 + PanicOnError bool + NoVerifyUpload bool htpasswdFile *HtpasswdFile quotaManager *quota.Manager @@ -46,6 +50,98 @@ func httpDefaultError(w http.ResponseWriter, code int) { http.Error(w, http.StatusText(code), code) } +// PreloadMetrics for Prometheus for each available repository. +func (s *Server) PreloadMetrics() error { + // No need to preload metrics if those are disabled. + if !s.Prometheus || s.PrometheusNoPreload { + return nil + } + + if _, statErr := os.Lstat(s.Path); errors.Is(statErr, os.ErrNotExist) { + log.Print("PreloadMetrics: skipping preloading as repo does not exists yet") + return nil + } + + var repoPaths []string + + walkFunc := func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if !d.IsDir() { + return nil + } + + // Verify that we're in an allowed directory. + for _, objectType := range repo.ObjectTypes { + if d.Name() == objectType { + return filepath.SkipDir + } + } + + // Verify that we're also a valid repository. + for _, objectType := range repo.ObjectTypes { + stat, statErr := os.Lstat(filepath.Join(path, objectType)) + if errors.Is(statErr, os.ErrNotExist) || !stat.IsDir() { + if s.Debug { + log.Printf("PreloadMetrics: %s misses directory %s; skip", path, objectType) + } + return nil + } + } + for _, fileType := range repo.FileTypes { + stat, statErr := os.Lstat(filepath.Join(path, fileType)) + if errors.Is(statErr, os.ErrNotExist) || !stat.Mode().IsRegular() { + if s.Debug { + log.Printf("PreloadMetrics: %s misses file %s; skip", path, fileType) + } + return nil + } + } + + if s.Debug { + log.Printf("PreloadMetrics: found repository %s", path) + } + repoPaths = append(repoPaths, path) + return nil + } + + if err := filepath.WalkDir(s.Path, walkFunc); err != nil { + return err + } + + for _, repoPath := range repoPaths { + // Remove leading path prefix. + relPath := repoPath[len(s.Path):] + if strings.HasPrefix(relPath, string(os.PathSeparator)) { + relPath = relPath[1:] + } + folderPath := strings.Split(relPath, string(os.PathSeparator)) + + if !folderPathValid(folderPath) { + return fmt.Errorf("invalid foder path %s for preloading", + strings.Join(folderPath, string(os.PathSeparator))) + } + + opt := repo.Options{ + Debug: s.Debug, + PanicOnError: s.PanicOnError, + BlobMetricFunc: makeBlobMetricFunc("", folderPath), + } + + handler, err := repo.New(repoPath, opt) + if err != nil { + return err + } + + if err := handler.PreloadMetrics(); err != nil { + return err + } + } + return nil +} + // ServeHTTP makes this server an http.Handler. It handlers the administrative // part of the request (figuring out the filesystem location, performing // authentication, etc) and then passes it on to repo.Handler for actual diff --git a/metrics.go b/metrics.go index 2419a60b..cea892fa 100644 --- a/metrics.go +++ b/metrics.go @@ -2,6 +2,7 @@ package restserver import ( "strings" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/restic/rest-server/repo" @@ -57,25 +58,39 @@ var metricBlobDeleteBytesTotal = prometheus.NewCounterVec( metricLabelList, ) +var metricRepoLastUpdateTimestamp = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "rest_server_repo_last_update_timestamp", + Help: "Unix timestamp of repository's last write update", + }, + []string{"repo"}, +) + // makeBlobMetricFunc creates a metrics callback function that increments the // Prometheus metrics. func makeBlobMetricFunc(username string, folderPath []string) repo.BlobMetricFunc { - var f repo.BlobMetricFunc = func(objectType string, operation repo.BlobOperation, nBytes uint64) { + var f repo.BlobMetricFunc = func(objectType string, operation repo.BlobOperation, payload uint64) { + repoPath := strings.Join(folderPath, "/") labels := prometheus.Labels{ "user": username, - "repo": strings.Join(folderPath, "/"), + "repo": repoPath, "type": objectType, } + switch operation { case repo.BlobRead: metricBlobReadTotal.With(labels).Inc() - metricBlobReadBytesTotal.With(labels).Add(float64(nBytes)) + metricBlobReadBytesTotal.With(labels).Add(float64(payload)) case repo.BlobWrite: metricBlobWriteTotal.With(labels).Inc() - metricBlobWriteBytesTotal.With(labels).Add(float64(nBytes)) + metricBlobWriteBytesTotal.With(labels).Add(float64(payload)) + metricRepoLastUpdateTimestamp.WithLabelValues(repoPath).Set( + float64(time.Now().UnixMilli()) / 1000.0) case repo.BlobDelete: metricBlobDeleteTotal.With(labels).Inc() - metricBlobDeleteBytesTotal.With(labels).Add(float64(nBytes)) + metricBlobDeleteBytesTotal.With(labels).Add(float64(payload)) + case repo.RepoPreloadLastUpdate: + metricRepoLastUpdateTimestamp.WithLabelValues(repoPath).Set(float64(payload) / 1000.0) } } return f @@ -89,4 +104,5 @@ func init() { prometheus.MustRegister(metricBlobReadBytesTotal) prometheus.MustRegister(metricBlobDeleteTotal) prometheus.MustRegister(metricBlobDeleteBytesTotal) + prometheus.MustRegister(metricRepoLastUpdateTimestamp) } diff --git a/repo/repo.go b/repo/repo.go index e59cc376..c236fe07 100644 --- a/repo/repo.go +++ b/repo/repo.go @@ -113,6 +113,8 @@ const ( BlobRead = 'R' // A blob has been read BlobWrite = 'W' // A blob has been written BlobDelete = 'D' // A blob has been deleted + + RepoPreloadLastUpdate = 'U' // Set last update timestamp for preloading ) // BlobMetricFunc is the callback signature for blob metrics. Such a callback @@ -123,6 +125,21 @@ const ( // TODO: Perhaps add http.Request for the username so that this can be cached? type BlobMetricFunc func(objectType string, operation BlobOperation, nBytes uint64) +// PreloadMetrics for Prometheus. +func (h *Handler) PreloadMetrics() error { + if h.opt.Debug { + log.Printf("%v.PreloadMetrics()", h) + } + + stat, err := os.Lstat(h.getSubPath("snapshots")) + if err != nil { + return err + } + h.sendMetric("", RepoPreloadLastUpdate, uint64(stat.ModTime().UnixMilli())) + + return nil +} + // ServeHTTP performs strict matching on the repo part of the URL path and // dispatches the request to the appropriate handler. func (h *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {