Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix that node deletion controller didn't sync at startup #9190

Merged
merged 2 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions kube-controllers/pkg/controllers/node/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,7 @@ const (
hepCreatedLabelValue = "calico-kube-controllers"
)

var (
retrySleepTime = 100 * time.Millisecond
)
var retrySleepTime = 100 * time.Millisecond

// NodeController implements the Controller interface. It is responsible for monitoring
// kubernetes nodes and responding to delete events by removing them from the Calico datastore.
Expand All @@ -67,7 +65,8 @@ func NewNodeController(ctx context.Context,
k8sClientset *kubernetes.Clientset,
calicoClient client.Interface,
cfg config.NodeControllerConfig,
nodeInformer, podInformer cache.SharedIndexInformer) controller.Controller {
nodeInformer, podInformer cache.SharedIndexInformer,
) controller.Controller {
nc := &NodeController{
ctx: ctx,
calicoClient: calicoClient,
Expand Down Expand Up @@ -102,7 +101,8 @@ func NewNodeController(ctx context.Context,
for _, f := range nodeDeletionFuncs {
f()
}
}}
},
}

// Create the Auto HostEndpoint sub-controller and register it to receive data.
// We always launch this controller, even if auto-HEPs are disabled, since the controller
Expand Down
31 changes: 26 additions & 5 deletions kube-controllers/pkg/controllers/node/node_deleter.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/util/workqueue"

bapi "github.com/projectcalico/calico/libcalico-go/lib/backend/api"
client "github.com/projectcalico/calico/libcalico-go/lib/clientv3"
cerrors "github.com/projectcalico/calico/libcalico-go/lib/errors"
"github.com/projectcalico/calico/libcalico-go/lib/options"
Expand All @@ -31,29 +32,49 @@ import (
// NewNodeDeletionController creates a new controller responsible for garbage collection Calico node objects
// in etcd mode when their corresponding Kubernetes node is deleted.
func NewNodeDeletionController(client client.Interface, cs *kubernetes.Clientset) *nodeDeleter {
return &nodeDeleter{
d := &nodeDeleter{
clientset: cs,
client: client,
rl: workqueue.DefaultControllerRateLimiter(),
syncChan: make(chan struct{}),
}
go d.run()
return d
}

type nodeDeleter struct {
rl workqueue.RateLimiter
clientset *kubernetes.Clientset
client client.Interface
syncChan chan struct{}
}

func (c *nodeDeleter) RegisterWith(f *DataFeed) {
// No-op - we only care about Kubernetes node deletion events.
// We use status updates to do a "start of day" sync. This controller doens't
// actually use the syncer feed, but we do key off syncer updates at start of day to
// trigger an initial sync. This helps catch scenarios where nodes may have been deleted
// while the controller was not running / being rescheduled.
f.RegisterForSyncStatus(c.onStatusUpdate)
}

func (c *nodeDeleter) OnKubernetesNodeDeleted() {
// When a Kubernetes node is deleted, trigger a sync.
log.Debug("Kubernetes node deletion event")
err := c.deleteStaleNodes()
if err != nil {
log.WithError(err).Warn("Error deleting any stale nodes")
c.syncChan <- struct{}{}
}

func (c *nodeDeleter) onStatusUpdate(s bapi.SyncStatus) {
if s == bapi.InSync {
log.Info("Sync status is now in sync, checking for stale nodes")
c.syncChan <- struct{}{}
}
}

func (c *nodeDeleter) run() {
for range c.syncChan {
if err := c.deleteStaleNodes(); err != nil {
log.WithError(err).Warn("Error deleting any stale nodes")
}
}
}

Expand Down