Skip to content

Commit

Permalink
NodeDrainer: Record pod deletion events on draining node (#152)
Browse files Browse the repository at this point in the history
  • Loading branch information
bastjan authored Feb 18, 2025
1 parent 9e51c6d commit b05850d
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 6 deletions.
8 changes: 7 additions & 1 deletion controllers/node_force_drain_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/tools/record"
"k8s.io/utils/ptr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
Expand All @@ -50,6 +51,9 @@ type NodeForceDrainReconciler struct {
APIReader client.Reader
Scheme *runtime.Scheme

// Recorder is used to record pod deletion events on nodes.
Recorder record.EventRecorder

Clock Clock

// MaxReconcileIntervalDuringActiveDrain is the longest possible interval at which the controller reconciles during active node drains.
Expand Down Expand Up @@ -250,6 +254,7 @@ func (r *NodeForceDrainReconciler) forceDrainNode(ctx context.Context, node core
}
l.Info("Deleting pod", "pod", pod.Name, "podNamespace", pod.Namespace)
attemptedDeletion = true
r.Recorder.Eventf(&node, corev1.EventTypeWarning, "NodeDrainDeletePod", "Deleting pod %q in namespace %q", pod.Name, pod.Namespace)
if err := r.Delete(ctx, &pod); err != nil {
deletionErrs = append(deletionErrs, err)
}
Expand Down Expand Up @@ -287,7 +292,8 @@ func (r *NodeForceDrainReconciler) forceDeletePodsOnNode(ctx context.Context, no
continue
}

l.Info("Force deleting pod")
l.Info("Forcing pod termination")
r.Recorder.Eventf(&node, corev1.EventTypeWarning, "NodeDrainForcingPodTermination", "Forcing pod termination: Deleting pod %q in namespace %q with grace period of 1.", pod.Name, pod.Namespace)
if err := r.Delete(ctx, &pod, &client.DeleteOptions{
// As far is I was able to find a grace period of 0 will leave the hanging pod on the node and block the reboot of the node.
// Therefore we set a grace period of 1 second for the quickest possible deletion.
Expand Down
8 changes: 8 additions & 0 deletions controllers/node_force_drain_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,12 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) {

cli := controllerClient(t, node1, node2, drainingStorageNode, stuckPodOnDrainingStorageNode, forceDrain, podOnNode1, podToForceDeleteOnNode1)

recorder := newFakeRecorder()
subject := &NodeForceDrainReconciler{
Client: cli,
APIReader: cli,
Scheme: cli.Scheme(),
Recorder: recorder,

Clock: &clock,
}
Expand Down Expand Up @@ -166,6 +168,8 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) {

var deletedPod corev1.Pod
require.Error(t, cli.Get(ctx, client.ObjectKeyFromObject(podOnNode1), &deletedPod), "pod should have been deleted")

requireEventMatches(t, recorder, `Warning NodeDrainDeletePod Deleting pod "pod-on-node1" in namespace "default"`)
})

t.Log("We can't control the clock used for DeletionTimestamp so there's a sequence break here. We will test force deletion at the end by skipping to real time.")
Expand All @@ -187,6 +191,8 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) {

var deletedPod corev1.Pod
require.Error(t, cli.Get(ctx, client.ObjectKeyFromObject(podToForceDeleteOnNode1), &deletedPod), "pod should have been deleted")

requireEventMatches(t, recorder, `Warning NodeDrainForcingPodTermination Forcing pod termination: Deleting pod "pod-on-node1-force-delete" in namespace "default" with grace period of 1.`)
})

step(t, "nodes stop draining", func(t *testing.T) {
Expand Down Expand Up @@ -356,6 +362,7 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSetsStaticP
Client: cli,
APIReader: cli,
Scheme: cli.Scheme(),
Recorder: newFakeRecorder(),

Clock: &clock,
}
Expand Down Expand Up @@ -423,6 +430,7 @@ func Test_NodeForceDrainReconciler_Reconcile_MaxIntervalDuringActiveDrain(t *tes
Client: cli,
APIReader: cli,
Scheme: cli.Scheme(),
Recorder: newFakeRecorder(),

Clock: &clock,
}
Expand Down
12 changes: 7 additions & 5 deletions controllers/upgradeconfig_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -252,11 +252,10 @@ func Test_UpgradeConfigReconciler_Reconcile_AddNextWindowsToStatus(t *testing.T)

client := controllerClient(t, ucv, upgradeConfig)

recorder := record.NewFakeRecorder(5)
subject := &UpgradeConfigReconciler{
Client: client,
Scheme: client.Scheme(),
Recorder: recorder,
Recorder: newFakeRecorder(),

Clock: &clock,

Expand Down Expand Up @@ -345,7 +344,7 @@ func Test_UpgradeConfigReconciler_Reconcile_SuspendedByWindow(t *testing.T) {

client := controllerClient(t, ucv, upgradeConfig, suspensionWindow)

recorder := record.NewFakeRecorder(5)
recorder := newFakeRecorder()
subject := &UpgradeConfigReconciler{
Client: client,
Scheme: client.Scheme(),
Expand Down Expand Up @@ -486,11 +485,10 @@ func Test_UpgradeConfigReconciler_Reconcile_CleanupSuccessfulJobs(t *testing.T)
}
}

recorder := record.NewFakeRecorder(5)
subject := &UpgradeConfigReconciler{
Client: client,
Scheme: client.Scheme(),
Recorder: recorder,
Recorder: newFakeRecorder(),

Clock: &clock,

Expand Down Expand Up @@ -531,6 +529,10 @@ func listJobs(t *testing.T, c client.Client, namespace string) []managedupgradev
return jobs.Items
}

func newFakeRecorder() *record.FakeRecorder {
return record.NewFakeRecorder(100)
}

func reconcileNTimes(t *testing.T, subject reconcile.Reconciler, ctx context.Context, req reconcile.Request, n int) (lastResult reconcile.Result) {
t.Helper()
for i := 0; i < n; i++ {
Expand Down
1 change: 1 addition & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@ func main() {
Client: mgr.GetClient(),
APIReader: mgr.GetAPIReader(),
Scheme: mgr.GetScheme(),
Recorder: mgr.GetEventRecorderFor("node-force-drain-controller"),

Clock: realClock{},

Expand Down

0 comments on commit b05850d

Please sign in to comment.