Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix race condition #52

Merged
merged 13 commits into from
Oct 16, 2022
4 changes: 2 additions & 2 deletions pkg/controller/cyclenoderequest/transitioner/checks.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ func (t *CycleNodeRequestTransitioner) sendPreTerminationTrigger(node v1.CycleNo
}

// Send the trigger, disregard the response body
statusCode, _, err := t.makeRequest(http.MethodPost, httpClient, endpoint)
statusCode, res, err := t.makeRequest(http.MethodPost, httpClient, endpoint)
if err != nil {
return fmt.Errorf("sending trigger failed: %v", err)
}
Expand All @@ -360,7 +360,7 @@ func (t *CycleNodeRequestTransitioner) sendPreTerminationTrigger(node v1.CycleNo
}

if !statusCodeFound {
return fmt.Errorf("got unexpected status code after sending trigger: %d", statusCode)
return fmt.Errorf("got unexpected status code after sending trigger: %d, resp: %s", statusCode, res)
}

now := metav1.Now()
Expand Down
14 changes: 6 additions & 8 deletions pkg/controller/cyclenoderequest/transitioner/transitions.go
Original file line number Diff line number Diff line change
Expand Up @@ -417,18 +417,16 @@ func (t *CycleNodeRequestTransitioner) transitionCordoning() (reconcile.Result,
for _, node := range t.cycleNodeRequest.Status.CurrentNodes {
// If the node is not already cordoned, cordon it
cordoned, err := k8s.IsCordoned(node.Name, t.rm.RawClient)
// Skip handling the node if it doesn't exist
if apierrors.IsNotFound(err) {
continue
}
if err != nil {
if apierrors.IsNotFound(err) {
continue
}
t.rm.Logger.Error(err, "failed to check if node is cordoned", "nodeName", node.Name)
return t.transitionToHealing(err)
}
if !cordoned {
if err := k8s.CordonNode(node.Name, t.rm.RawClient); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return t.transitionToHealing(err)
}
}
Expand All @@ -439,14 +437,14 @@ func (t *CycleNodeRequestTransitioner) transitionCordoning() (reconcile.Result,
// Try to send the trigger, if is has already been sent then this will
// be skipped in the function. The trigger must only be sent once
if err := t.sendPreTerminationTrigger(node); err != nil {
return t.transitionToHealing(errors.Wrapf(err, "failed to send pre-termination trigger, %s is still cordononed", node.Name))
t.rm.LogEvent(t.cycleNodeRequest, "PreTerminationTriggerFailed", "failed to send pre-termination trigger to %v, err: %v", node.Name, err)
hyang200 marked this conversation as resolved.
Show resolved Hide resolved
}

// After the trigger has been sent, perform health checks to monitor if the node
// can be terminated. If all checks pass then it can be terminated.
allHealthChecksPassed, err := t.performPreTerminationHealthChecks(node)
if err != nil {
return t.transitionToHealing(errors.Wrapf(err, "failed to perform pre-termination health checks, %s is still cordononed", node.Name))
t.rm.LogEvent(t.cycleNodeRequest, "PreTerminationHealChecks", "failed to perform pre-termination health checks to %v, err: %v", node.Name, err)
hyang200 marked this conversation as resolved.
Show resolved Hide resolved
hyang200 marked this conversation as resolved.
Show resolved Hide resolved
}

// If not all health checks have passed, it is not ready for termination yet
Expand Down
5 changes: 0 additions & 5 deletions pkg/k8s/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"strings"

v1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -42,10 +41,6 @@ func UncordonNode(name string, client kubernetes.Interface) error {
func IsCordoned(name string, client kubernetes.Interface) (bool, error) {
node, err := client.CoreV1().Nodes().Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
// considered node is cordoned when it's removed from kube api
if apierrors.IsNotFound(err) {
return true, err
}
return false, err
}
return node.Spec.Unschedulable, nil
Expand Down