diff --git a/cmd/vgmanager/vgmanager.go b/cmd/vgmanager/vgmanager.go index d8546bc93..a5fa85a18 100644 --- a/cmd/vgmanager/vgmanager.go +++ b/cmd/vgmanager/vgmanager.go @@ -18,6 +18,7 @@ package vgmanager import ( "context" + "errors" "fmt" "os" "os/signal" @@ -42,11 +43,11 @@ import ( "github.com/spf13/cobra" "github.com/topolvm/topolvm" "github.com/topolvm/topolvm/pkg/controller" - "github.com/topolvm/topolvm/pkg/driver" topoLVMD "github.com/topolvm/topolvm/pkg/lvmd" "github.com/topolvm/topolvm/pkg/runners" "google.golang.org/grpc" + "k8s.io/utils/ptr" "k8s.io/apimachinery/pkg/runtime" // Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.) @@ -70,6 +71,8 @@ const ( DefaultProbeAddr = ":8081" ) +var ErrConfigModified = errors.New("lvmd config file is modified") + type Options struct { Scheme *runtime.Scheme SetupLog logr.Logger @@ -101,7 +104,9 @@ func NewCmd(opts *Options) *cobra.Command { } func run(cmd *cobra.Command, _ []string, opts *Options) error { - ctx, cancel := context.WithCancel(cmd.Context()) + ctx, cancelWithCause := context.WithCancelCause(cmd.Context()) + defer cancelWithCause(nil) + ctx, cancel := signal.NotifyContext(ctx, os.Interrupt, syscall.SIGTERM) defer cancel() lvm := lvm.NewDefaultHostLVM() @@ -138,6 +143,7 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error { operatorNamespace: {}, }, }, + GracefulShutdownTimeout: ptr.To(time.Duration(-1)), }) if err != nil { return fmt.Errorf("unable to start manager: %w", err) @@ -203,7 +209,6 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error { NodeName: nodeName, Namespace: operatorNamespace, Filters: filter.DefaultFilters, - Shutdown: cancel, }).SetupWithManager(mgr); err != nil { return fmt.Errorf("unable to create controller VGManager: %w", err) } @@ -212,74 +217,79 @@ func run(cmd *cobra.Command, _ []string, opts *Options) error { return fmt.Errorf("unable to set up health check: %w", err) } - c := make(chan os.Signal, 2) - signal.Notify(c, []os.Signal{os.Interrupt, syscall.SIGTERM}...) - go func() { - <-c - cancel() - <-c - os.Exit(1) // second signal. Exit directly. - }() - // Create new watcher. watcher, err := fsnotify.NewWatcher() if err != nil { return fmt.Errorf("unable to set up file watcher: %w", err) } defer watcher.Close() - - // Start listening for events. - go func() { - fileNotExist := false - for { - // check if file exists, otherwise the watcher errors - _, err := os.Lstat(lvmd.DefaultFileConfigPath) - if err != nil { - if os.IsNotExist(err) { - time.Sleep(100 * time.Millisecond) - fileNotExist = true - } else { - opts.SetupLog.Error(err, "unable to check if lvmd config file exists") - } - } else { - // This handles the first time the file is created through the configmap - if fileNotExist { - cancel() - } - err = watcher.Add(lvmd.DefaultFileConfigPath) - if err != nil { - opts.SetupLog.Error(err, "unable to add file path to watcher") - } - break - } - } - for { - select { - case event, ok := <-watcher.Events: - if !ok { - return - } - if event.Has(fsnotify.Write) || event.Has(fsnotify.Remove) || event.Has(fsnotify.Chmod) { - opts.SetupLog.Info("lvmd config file is modified", "eventName", event.Name) - cancel() - } - case err, ok := <-watcher.Errors: - if !ok { - return - } - opts.SetupLog.Error(err, "file watcher error") - } - } - }() + // Start listening for events on TopoLVM files. + go watchTopoLVMAndNotify(opts, cancelWithCause, watcher) opts.SetupLog.Info("starting manager") if err := mgr.Start(ctx); err != nil { return fmt.Errorf("problem running manager: %w", err) } + if errors.Is(context.Cause(ctx), ErrConfigModified) { + opts.SetupLog.Info("restarting controller due to modified configuration") + return run(cmd, nil, opts) + } else if err := ctx.Err(); err != nil { + opts.SetupLog.Error(err, "exiting abnormally") + os.Exit(1) + } + return nil } +// watchTopoLVMAndNotify watches for changes to the lvmd config file and cancels the context if it changes. +// This is used to restart the manager when the lvmd config file changes. +// This is a blocking function and should be run in a goroutine. +// If the directory does not exist, it will be created to make it possible to watch for changes. +// If the watch determines that the lvmd config file has been modified, it will cancel the context with the ErrConfigModified error. +func watchTopoLVMAndNotify(opts *Options, cancelWithCause context.CancelCauseFunc, watcher *fsnotify.Watcher) { + // check if file exists, otherwise the watcher errors + fi, err := os.Stat(lvmd.DefaultFileConfigDir) + if err != nil { + if os.IsNotExist(err) { + if err := os.MkdirAll(lvmd.DefaultFileConfigDir, 0755); err != nil { + opts.SetupLog.Error(err, "unable to create lvmd config directory when it did not exist before") + } + } else { + opts.SetupLog.Error(err, "unable to check if lvmd config directory exists") + cancelWithCause(err) + } + } else if !fi.IsDir() { + opts.SetupLog.Error(err, "expected lvmd config directory is not a directory") + cancelWithCause(err) + } + + err = watcher.Add(lvmd.DefaultFileConfigDir) + if err != nil { + opts.SetupLog.Error(err, "unable to add file path to watcher") + } + for { + select { + case event, ok := <-watcher.Events: + if !ok { + return + } + if event.Name != lvmd.DefaultFileConfigPath { + continue + } + if event.Has(fsnotify.Write) || event.Has(fsnotify.Remove) || event.Has(fsnotify.Chmod) { + opts.SetupLog.Info("lvmd config file is modified", "eventName", event.Name) + cancelWithCause(fmt.Errorf("%w: %s", ErrConfigModified, event.Name)) + } + case err, ok := <-watcher.Errors: + if !ok { + return + } + opts.SetupLog.Error(err, "file watcher error") + } + } +} + func loadConfFile(ctx context.Context, config *lvmd.Config, cfgFilePath string) error { b, err := os.ReadFile(cfgFilePath) if err != nil { diff --git a/go.mod b/go.mod index e078079d7..462e63959 100644 --- a/go.mod +++ b/go.mod @@ -45,7 +45,7 @@ require ( sigs.k8s.io/yaml v1.4.0 ) -replace github.com/topolvm/topolvm => github.com/openshift/topolvm v0.15.3-0.20240314121823-1339f4f8b9ae +replace github.com/topolvm/topolvm => github.com/openshift/topolvm v0.15.3-0.20240321104545-ab31b05c1b85 require ( github.com/Masterminds/goutils v1.1.1 // indirect diff --git a/go.sum b/go.sum index 63e42036c..f46dbd740 100644 --- a/go.sum +++ b/go.sum @@ -155,8 +155,8 @@ github.com/openshift/client-go v0.0.0-20240312121557-60dd5f9fbf8d h1:vdrC3QYkFcs github.com/openshift/client-go v0.0.0-20240312121557-60dd5f9fbf8d/go.mod h1:Y5Hp789dTrF6Fq8cA5YQlpwffmlLy8mc2un/CY0cg7Q= github.com/openshift/library-go v0.0.0-20240312152318-4109a9e7a437 h1:xMflL80gT2cXxnmDkR8QLZCbfh/x38jV5XOfLiNlsLE= github.com/openshift/library-go v0.0.0-20240312152318-4109a9e7a437/go.mod h1:ePlaOqUiPplRc++6aYdMe+2FmXb2xTNS9Nz5laG2YmI= -github.com/openshift/topolvm v0.15.3-0.20240314121823-1339f4f8b9ae h1:j6Y1iy4+Ml7QM3psiAeUtLUY+mFHfdi7xolnNtwz8fI= -github.com/openshift/topolvm v0.15.3-0.20240314121823-1339f4f8b9ae/go.mod h1:BfEw9thAo64c4xYejdbGYPOHPWhAKIaGwq72gP2Lqkc= +github.com/openshift/topolvm v0.15.3-0.20240321104545-ab31b05c1b85 h1:LdqfViDjjcIIgidKm+7lIRlpA49XZh9Qfjym4ab4CH8= +github.com/openshift/topolvm v0.15.3-0.20240321104545-ab31b05c1b85/go.mod h1:sp/P6O6+7is1dnV6am+yk+djRxxfcOPyzW4Bi2CfEyU= github.com/operator-framework/api v0.22.0 h1:UZSn+iaQih4rCReezOnWTTJkMyawwV5iLnIItaOzytY= github.com/operator-framework/api v0.22.0/go.mod h1:p/7YDbr+n4fmESfZ47yLAV1SvkfE6NU2aX8KhcfI0GA= github.com/pelletier/go-toml v1.9.5 h1:4yBQzkHv+7BHq2PQUZF3Mx0IYxG7LsP222s7Agd3ve8= diff --git a/internal/controllers/vgmanager/controller.go b/internal/controllers/vgmanager/controller.go index 3bfeeb032..a57d35aba 100644 --- a/internal/controllers/vgmanager/controller.go +++ b/internal/controllers/vgmanager/controller.go @@ -95,7 +95,6 @@ type Reconciler struct { NodeName string Namespace string Filters func(*lvmv1alpha1.LVMVolumeGroup) filter.Filters - Shutdown context.CancelFunc } func (r *Reconciler) getFinalizer() string { @@ -392,11 +391,9 @@ func (r *Reconciler) updateLVMDConfigAfterReconcile( if err := r.LVMD.Save(ctx, newCFG); err != nil { return fmt.Errorf("failed to update lvmd config file to update volume group %s: %w", volumeGroup.GetName(), err) } - msg := "updated lvmd config with new deviceClasses, now restarting.." + msg := "updated lvmd config with new deviceClasses" logger.Info(msg) r.NormalEvent(ctx, volumeGroup, EventReasonLVMDConfigUpdated, msg) - - r.Shutdown() } return nil } diff --git a/internal/controllers/vgmanager/controller_test.go b/internal/controllers/vgmanager/controller_test.go index 9a475cc99..ef027c3ab 100644 --- a/internal/controllers/vgmanager/controller_test.go +++ b/internal/controllers/vgmanager/controller_test.go @@ -302,14 +302,8 @@ func testMockedBlockDeviceOnHost(ctx context.Context) { }) By("triggering the next reconciliation after the creation of the thin pool", func() { - cancelled := false - cancelable := func() { - cancelled = true - } - instances.Reconciler.Shutdown = cancelable _, err := instances.Reconciler.Reconcile(ctx, reconcile.Request{NamespacedName: client.ObjectKeyFromObject(vg)}) Expect(err).ToNot(HaveOccurred()) - Eventually(ctx, func() bool { return cancelled }).Should(BeTrue()) }) By("verifying the lvmd config generation", func() { diff --git a/internal/controllers/vgmanager/lvmd/lvmd.go b/internal/controllers/vgmanager/lvmd/lvmd.go index 65937ba31..0a32616b9 100644 --- a/internal/controllers/vgmanager/lvmd/lvmd.go +++ b/internal/controllers/vgmanager/lvmd/lvmd.go @@ -21,7 +21,8 @@ type ThinPoolConfig = lvmd.ThinPoolConfig var TypeThin = lvmd.TypeThin const ( - DefaultFileConfigPath = "/etc/topolvm/lvmd.yaml" + DefaultFileConfigDir = "/etc/topolvm" + DefaultFileConfigPath = DefaultFileConfigDir + "/lvmd.yaml" maxReadLength = 2 * 1 << 20 // 2MB ) diff --git a/vendor/github.com/topolvm/topolvm/internal/runners/metrics_exporter.go b/vendor/github.com/topolvm/topolvm/internal/runners/metrics_exporter.go index 4107abc93..a21903283 100644 --- a/vendor/github.com/topolvm/topolvm/internal/runners/metrics_exporter.go +++ b/vendor/github.com/topolvm/topolvm/internal/runners/metrics_exporter.go @@ -63,7 +63,6 @@ var _ manager.LeaderElectionRunnable = &metricsExporter{} // NewMetricsExporter creates controller-runtime's manager.Runnable to run // a metrics exporter for a node. func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Client, nodeName string) manager.Runnable { - // metrics available under volumegroup subsystem availableBytes := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: metricsNamespace, @@ -72,7 +71,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG available bytes under lvmd management", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(availableBytes) sizeBytes := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: metricsNamespace, @@ -81,7 +79,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG size bytes under lvmd management", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(sizeBytes) // metrics available under thinpool subsystem tpSizeBytes := prometheus.NewGaugeVec(prometheus.GaugeOpts{ @@ -91,7 +88,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG Thin Pool raw size bytes", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(tpSizeBytes) dataPercent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: metricsNamespace, @@ -100,7 +96,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG Thin Pool data usage percent", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(dataPercent) metadataPercent := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: metricsNamespace, @@ -109,7 +104,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG Thin Pool metadata usage percent", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(metadataPercent) opAvailableBytes := prometheus.NewGaugeVec(prometheus.GaugeOpts{ Namespace: metricsNamespace, @@ -118,7 +112,6 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli Help: "LVM VG Thin Pool bytes available with overprovisioning", ConstLabels: prometheus.Labels{"node": nodeName}, }, []string{"device_class"}) - metrics.Registry.MustRegister(opAvailableBytes) return &metricsExporter{ client: client, @@ -135,13 +128,44 @@ func NewMetricsExporter(vgServiceClient proto.VGServiceClient, client client.Cli } } +func (m *metricsExporter) getCollectors() []prometheus.Collector { + return []prometheus.Collector{ + m.availableBytes, + m.sizeBytes, + m.thinPool.tpSizeBytes, + m.thinPool.dataPercent, + m.thinPool.metadataPercent, + m.thinPool.opAvailableBytes, + } +} + +func (m *metricsExporter) registerAll() error { + for _, c := range m.getCollectors() { + if err := metrics.Registry.Register(c); err != nil { + return err + } + } + return nil +} + +func (m *metricsExporter) unregisterAll() { + for _, c := range m.getCollectors() { + metrics.Registry.Unregister(c) + } +} + // Start implements controller-runtime's manager.Runnable. func (m *metricsExporter) Start(ctx context.Context) error { + if err := m.registerAll(); err != nil { + return err + } + metricsCh := make(chan NodeMetrics) go func() { for { select { case <-ctx.Done(): + m.unregisterAll() return case met := <-metricsCh: diff --git a/vendor/modules.txt b/vendor/modules.txt index 52b20c352..8f5ecf37c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -374,7 +374,7 @@ github.com/stretchr/objx ## explicit; go 1.17 github.com/stretchr/testify/assert github.com/stretchr/testify/mock -# github.com/topolvm/topolvm v0.27.1-0.20240306010159-8c6c91dc8b7f => github.com/openshift/topolvm v0.15.3-0.20240314121823-1339f4f8b9ae +# github.com/topolvm/topolvm v0.27.1-0.20240306010159-8c6c91dc8b7f => github.com/openshift/topolvm v0.15.3-0.20240321104545-ab31b05c1b85 ## explicit; go 1.20 github.com/topolvm/topolvm github.com/topolvm/topolvm/api/legacy/v1 @@ -1287,4 +1287,4 @@ sigs.k8s.io/structured-merge-diff/v4/value ## explicit; go 1.12 sigs.k8s.io/yaml sigs.k8s.io/yaml/goyaml.v2 -# github.com/topolvm/topolvm => github.com/openshift/topolvm v0.15.3-0.20240314121823-1339f4f8b9ae +# github.com/topolvm/topolvm => github.com/openshift/topolvm v0.15.3-0.20240321104545-ab31b05c1b85