-
Notifications
You must be signed in to change notification settings - Fork 4.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Bug 1550266 - Fix clearInitialNodeNetworkUnavailableCondition() in sdn master #18758
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -190,55 +190,46 @@ func (master *OsdnMaster) deleteNode(nodeName string) error { | |
// TODO: make upstream kubelet more flexible with overlays and GCE so this | ||
// condition doesn't get added for network plugins that don't want it, and then | ||
// we can remove this function. | ||
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(node *kapi.Node) { | ||
func (master *OsdnMaster) clearInitialNodeNetworkUnavailableCondition(origNode *kapi.Node) { | ||
// Informer cache should not be mutated, so get a copy of the object | ||
node := origNode.DeepCopy() | ||
knode := node | ||
cleared := false | ||
resultErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error { | ||
var err error | ||
|
||
if knode != node { | ||
knode, err = master.kClient.Core().Nodes().Get(node.ObjectMeta.Name, metav1.GetOptions{}) | ||
knode, err = master.kClient.Core().Nodes().Get(node.Name, metav1.GetOptions{}) | ||
if err != nil { | ||
return err | ||
} | ||
} | ||
|
||
// Let caller modify knode's status, then push to api server. | ||
_, condition := GetNodeCondition(&node.Status, kapi.NodeNetworkUnavailable) | ||
if condition != nil && condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" { | ||
condition.Status = kapi.ConditionFalse | ||
condition.Reason = "RouteCreated" | ||
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated" | ||
condition.LastTransitionTime = metav1.Now() | ||
knode, err = master.kClient.Core().Nodes().UpdateStatus(knode) | ||
if err == nil { | ||
cleared = true | ||
for i := range knode.Status.Conditions { | ||
if knode.Status.Conditions[i].Type == kapi.NodeNetworkUnavailable { | ||
condition := &knode.Status.Conditions[i] | ||
if condition.Status != kapi.ConditionFalse && condition.Reason == "NoRouteCreated" { | ||
condition.Status = kapi.ConditionFalse | ||
condition.Reason = "RouteCreated" | ||
condition.Message = "openshift-sdn cleared kubelet-set NoRouteCreated" | ||
condition.LastTransitionTime = metav1.Now() | ||
|
||
if knode, err = master.kClient.Core().Nodes().UpdateStatus(knode); err == nil { | ||
cleared = true | ||
} | ||
} | ||
break | ||
} | ||
} | ||
return err | ||
}) | ||
if resultErr != nil { | ||
utilruntime.HandleError(fmt.Errorf("status update failed for local node: %v", resultErr)) | ||
} else if cleared { | ||
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.ObjectMeta.Name) | ||
glog.Infof("Cleared node NetworkUnavailable/NoRouteCreated condition for %s", node.Name) | ||
} | ||
} | ||
|
||
// TODO remove this and switch to external | ||
// GetNodeCondition extracts the provided condition from the given status and returns that. | ||
// Returns nil and -1 if the condition is not present, and the index of the located condition. | ||
func GetNodeCondition(status *kapi.NodeStatus, conditionType kapi.NodeConditionType) (int, *kapi.NodeCondition) { | ||
if status == nil { | ||
return -1, nil | ||
} | ||
for i := range status.Conditions { | ||
if status.Conditions[i].Type == conditionType { | ||
return i, &status.Conditions[i] | ||
} | ||
} | ||
return -1, nil | ||
} | ||
|
||
func (master *OsdnMaster) watchNodes() { | ||
funcs := common.InformerFuncs(&kapi.Node{}, master.handleAddOrUpdateNode, master.handleDeleteNode) | ||
master.kubeInformers.Core().InternalVersion().Nodes().Informer().AddEventHandler(funcs) | ||
|
@@ -258,14 +249,15 @@ func (master *OsdnMaster) handleAddOrUpdateNode(obj, _ interface{}, eventType wa | |
utilruntime.HandleError(fmt.Errorf("Node IP is not set for node %s, skipping %s event, node: %v", node.Name, eventType, node)) | ||
return | ||
} | ||
master.clearInitialNodeNetworkUnavailableCondition(node) | ||
|
||
if oldNodeIP, ok := master.hostSubnetNodeIPs[node.UID]; ok && (nodeIP == oldNodeIP) { | ||
return | ||
} | ||
// Node status is frequently updated by kubelet, so log only if the above condition is not met | ||
glog.V(5).Infof("Watch %s event for Node %q", eventType, node.Name) | ||
|
||
master.clearInitialNodeNetworkUnavailableCondition(node) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before we do this, we should make sure that we don't run these controllers at all: kubernetes/pkg/controller/cloud/node_controller.go since they both will set NodeNetworkUnavailable on the node in addition to kubelet. I don't think we run the route controller, but I'm not sure about the node controller. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hm... oh, yeah, for some reason I was thinking we'd still run clearInitialNodeNetworkUnavailableCondition on any "real" Node change, just not on the "Node status is frequently updated by kubelet" changes. But I guess this makes it so we only run clearInitialNodeNetworkUnavailableCondition when the IP change, which is riskier There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We do not run CloudNodeController (kubernetes/pkg/controller/cloud/node_controller.go), RouteController (kubernetes/pkg/controller/route/router_controller.go) and other kubernetes/pkg/controller/node/ipam/{sync, adapter, cloud_cidr_allocator} in OpenShift where NodeNetworkUnavailabe condition is used. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Created bug: https://bugzilla.redhat.com/show_bug.cgi?id=1550266 to ensure there are no issues/regressions on GCP with this change. |
||
|
||
usedNodeIP, err := master.addNode(node.Name, string(node.UID), nodeIP, nil) | ||
if err != nil { | ||
utilruntime.HandleError(fmt.Errorf("Error creating subnet for node %s, ip %s: %v", node.Name, nodeIP, err)) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
With this implementation, 'condition' is a copy from knode.Status.Conditions[i] and modifying 'condition' will not reflect any change in knode.Status.Conditions.
In this case, we do want to modify condition.{Status, Reason, ...} fields.