From b5ce29dd4c898cf9b19123a91e6aec21b8f308cc Mon Sep 17 00:00:00 2001 From: Casey Davenport Date: Wed, 28 Aug 2024 14:57:36 -0700 Subject: [PATCH 1/2] Fix that node deletion controller didn't sync at startup --- .../pkg/controllers/node/controller.go | 10 +++--- .../pkg/controllers/node/node_deleter.go | 34 ++++++++++++++++--- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/kube-controllers/pkg/controllers/node/controller.go b/kube-controllers/pkg/controllers/node/controller.go index 68170c2c991..a1135dacd3c 100644 --- a/kube-controllers/pkg/controllers/node/controller.go +++ b/kube-controllers/pkg/controllers/node/controller.go @@ -40,9 +40,7 @@ const ( hepCreatedLabelValue = "calico-kube-controllers" ) -var ( - retrySleepTime = 100 * time.Millisecond -) +var retrySleepTime = 100 * time.Millisecond // NodeController implements the Controller interface. It is responsible for monitoring // kubernetes nodes and responding to delete events by removing them from the Calico datastore. @@ -67,7 +65,8 @@ func NewNodeController(ctx context.Context, k8sClientset *kubernetes.Clientset, calicoClient client.Interface, cfg config.NodeControllerConfig, - nodeInformer, podInformer cache.SharedIndexInformer) controller.Controller { + nodeInformer, podInformer cache.SharedIndexInformer, +) controller.Controller { nc := &NodeController{ ctx: ctx, calicoClient: calicoClient, @@ -102,7 +101,8 @@ func NewNodeController(ctx context.Context, for _, f := range nodeDeletionFuncs { f() } - }} + }, + } // Create the Auto HostEndpoint sub-controller and register it to receive data. // We always launch this controller, even if auto-HEPs are disabled, since the controller diff --git a/kube-controllers/pkg/controllers/node/node_deleter.go b/kube-controllers/pkg/controllers/node/node_deleter.go index a89e0945d94..3b37d7f1c83 100644 --- a/kube-controllers/pkg/controllers/node/node_deleter.go +++ b/kube-controllers/pkg/controllers/node/node_deleter.go @@ -23,6 +23,7 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/client-go/util/workqueue" + bapi "github.com/projectcalico/calico/libcalico-go/lib/backend/api" client "github.com/projectcalico/calico/libcalico-go/lib/clientv3" cerrors "github.com/projectcalico/calico/libcalico-go/lib/errors" "github.com/projectcalico/calico/libcalico-go/lib/options" @@ -31,29 +32,52 @@ import ( // NewNodeDeletionController creates a new controller responsible for garbage collection Calico node objects // in etcd mode when their corresponding Kubernetes node is deleted. func NewNodeDeletionController(client client.Interface, cs *kubernetes.Clientset) *nodeDeleter { - return &nodeDeleter{ + d := &nodeDeleter{ clientset: cs, client: client, rl: workqueue.DefaultControllerRateLimiter(), + syncChan: make(chan struct{}), } + go d.run() + return d } type nodeDeleter struct { rl workqueue.RateLimiter clientset *kubernetes.Clientset client client.Interface + syncChan chan struct{} } func (c *nodeDeleter) RegisterWith(f *DataFeed) { - // No-op - we only care about Kubernetes node deletion events. + // We use status updates to do a "start of day" sync. This controller doens't + // actually use the syncer feed, but we do key off syncer updates at start of day to + // trigger an initial sync. This helps catch scenarios where nodes may have been deleted + // while the controller was not running / being rescheduled. + f.RegisterForSyncStatus(c.onStatusUpdate) } func (c *nodeDeleter) OnKubernetesNodeDeleted() { // When a Kubernetes node is deleted, trigger a sync. log.Debug("Kubernetes node deletion event") - err := c.deleteStaleNodes() - if err != nil { - log.WithError(err).Warn("Error deleting any stale nodes") + c.syncChan <- struct{}{} +} + +func (c *nodeDeleter) onStatusUpdate(s bapi.SyncStatus) { + if s == bapi.InSync { + log.Info("Sync status is now in sync, checking for stale nodes") + c.syncChan <- struct{}{} + } +} + +func (c *nodeDeleter) run() { + for { + select { + case <-c.syncChan: + if err := c.deleteStaleNodes(); err != nil { + log.WithError(err).Warn("Error deleting any stale nodes") + } + } } } From 0ab31c3cf95818950baf620064266ede7cfd760b Mon Sep 17 00:00:00 2001 From: Casey Davenport Date: Wed, 28 Aug 2024 15:44:22 -0700 Subject: [PATCH 2/2] Fix static checks --- kube-controllers/pkg/controllers/node/node_deleter.go | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/kube-controllers/pkg/controllers/node/node_deleter.go b/kube-controllers/pkg/controllers/node/node_deleter.go index 3b37d7f1c83..14cf934281e 100644 --- a/kube-controllers/pkg/controllers/node/node_deleter.go +++ b/kube-controllers/pkg/controllers/node/node_deleter.go @@ -71,12 +71,9 @@ func (c *nodeDeleter) onStatusUpdate(s bapi.SyncStatus) { } func (c *nodeDeleter) run() { - for { - select { - case <-c.syncChan: - if err := c.deleteStaleNodes(); err != nil { - log.WithError(err).Warn("Error deleting any stale nodes") - } + for range c.syncChan { + if err := c.deleteStaleNodes(); err != nil { + log.WithError(err).Warn("Error deleting any stale nodes") } } }