diff --git a/controllers/node_force_drain_controller.go b/controllers/node_force_drain_controller.go index ae65b59..2f37a1e 100644 --- a/controllers/node_force_drain_controller.go +++ b/controllers/node_force_drain_controller.go @@ -29,6 +29,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/tools/record" "k8s.io/utils/ptr" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -50,6 +51,9 @@ type NodeForceDrainReconciler struct { APIReader client.Reader Scheme *runtime.Scheme + // Recorder is used to record pod deletion events on nodes. + Recorder record.EventRecorder + Clock Clock // MaxReconcileIntervalDuringActiveDrain is the longest possible interval at which the controller reconciles during active node drains. @@ -250,6 +254,7 @@ func (r *NodeForceDrainReconciler) forceDrainNode(ctx context.Context, node core } l.Info("Deleting pod", "pod", pod.Name, "podNamespace", pod.Namespace) attemptedDeletion = true + r.Recorder.Eventf(&node, corev1.EventTypeWarning, "NodeDrainDeletePod", "Deleting pod %q in namespace %q", pod.Name, pod.Namespace) if err := r.Delete(ctx, &pod); err != nil { deletionErrs = append(deletionErrs, err) } @@ -287,7 +292,8 @@ func (r *NodeForceDrainReconciler) forceDeletePodsOnNode(ctx context.Context, no continue } - l.Info("Force deleting pod") + l.Info("Forcing pod termination") + r.Recorder.Eventf(&node, corev1.EventTypeWarning, "NodeDrainForcingPodTermination", "Forcing pod termination: Deleting pod %q in namespace %q with grace period of 1.", pod.Name, pod.Namespace) if err := r.Delete(ctx, &pod, &client.DeleteOptions{ // As far is I was able to find a grace period of 0 will leave the hanging pod on the node and block the reboot of the node. // Therefore we set a grace period of 1 second for the quickest possible deletion. diff --git a/controllers/node_force_drain_controller_test.go b/controllers/node_force_drain_controller_test.go index 9765eb7..0fa21f9 100644 --- a/controllers/node_force_drain_controller_test.go +++ b/controllers/node_force_drain_controller_test.go @@ -102,10 +102,12 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) { cli := controllerClient(t, node1, node2, drainingStorageNode, stuckPodOnDrainingStorageNode, forceDrain, podOnNode1, podToForceDeleteOnNode1) + recorder := newFakeRecorder() subject := &NodeForceDrainReconciler{ Client: cli, APIReader: cli, Scheme: cli.Scheme(), + Recorder: recorder, Clock: &clock, } @@ -166,6 +168,8 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) { var deletedPod corev1.Pod require.Error(t, cli.Get(ctx, client.ObjectKeyFromObject(podOnNode1), &deletedPod), "pod should have been deleted") + + requireEventMatches(t, recorder, `Warning NodeDrainDeletePod Deleting pod "pod-on-node1" in namespace "default"`) }) t.Log("We can't control the clock used for DeletionTimestamp so there's a sequence break here. We will test force deletion at the end by skipping to real time.") @@ -187,6 +191,8 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) { var deletedPod corev1.Pod require.Error(t, cli.Get(ctx, client.ObjectKeyFromObject(podToForceDeleteOnNode1), &deletedPod), "pod should have been deleted") + + requireEventMatches(t, recorder, `Warning NodeDrainForcingPodTermination Forcing pod termination: Deleting pod "pod-on-node1-force-delete" in namespace "default" with grace period of 1.`) }) step(t, "nodes stop draining", func(t *testing.T) { @@ -356,6 +362,7 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSetsStaticP Client: cli, APIReader: cli, Scheme: cli.Scheme(), + Recorder: newFakeRecorder(), Clock: &clock, } @@ -423,6 +430,7 @@ func Test_NodeForceDrainReconciler_Reconcile_MaxIntervalDuringActiveDrain(t *tes Client: cli, APIReader: cli, Scheme: cli.Scheme(), + Recorder: newFakeRecorder(), Clock: &clock, } diff --git a/controllers/upgradeconfig_controller_test.go b/controllers/upgradeconfig_controller_test.go index e622686..77e9e0e 100644 --- a/controllers/upgradeconfig_controller_test.go +++ b/controllers/upgradeconfig_controller_test.go @@ -252,11 +252,10 @@ func Test_UpgradeConfigReconciler_Reconcile_AddNextWindowsToStatus(t *testing.T) client := controllerClient(t, ucv, upgradeConfig) - recorder := record.NewFakeRecorder(5) subject := &UpgradeConfigReconciler{ Client: client, Scheme: client.Scheme(), - Recorder: recorder, + Recorder: newFakeRecorder(), Clock: &clock, @@ -345,7 +344,7 @@ func Test_UpgradeConfigReconciler_Reconcile_SuspendedByWindow(t *testing.T) { client := controllerClient(t, ucv, upgradeConfig, suspensionWindow) - recorder := record.NewFakeRecorder(5) + recorder := newFakeRecorder() subject := &UpgradeConfigReconciler{ Client: client, Scheme: client.Scheme(), @@ -486,11 +485,10 @@ func Test_UpgradeConfigReconciler_Reconcile_CleanupSuccessfulJobs(t *testing.T) } } - recorder := record.NewFakeRecorder(5) subject := &UpgradeConfigReconciler{ Client: client, Scheme: client.Scheme(), - Recorder: recorder, + Recorder: newFakeRecorder(), Clock: &clock, @@ -531,6 +529,10 @@ func listJobs(t *testing.T, c client.Client, namespace string) []managedupgradev return jobs.Items } +func newFakeRecorder() *record.FakeRecorder { + return record.NewFakeRecorder(100) +} + func reconcileNTimes(t *testing.T, subject reconcile.Reconciler, ctx context.Context, req reconcile.Request, n int) (lastResult reconcile.Result) { t.Helper() for i := 0; i < n; i++ { diff --git a/main.go b/main.go index 5db057d..5cead0f 100644 --- a/main.go +++ b/main.go @@ -179,6 +179,7 @@ func main() { Client: mgr.GetClient(), APIReader: mgr.GetAPIReader(), Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("node-force-drain-controller"), Clock: realClock{},