diff --git a/pkg/scheduler/api/node_info.go b/pkg/scheduler/api/node_info.go index 59edb3e8fa..2f4d3bc482 100644 --- a/pkg/scheduler/api/node_info.go +++ b/pkg/scheduler/api/node_info.go @@ -295,17 +295,34 @@ func (ni *NodeInfo) setNodeGPUInfo(node *v1.Node) { // SetNode sets kubernetes node object to nodeInfo object func (ni *NodeInfo) SetNode(node *v1.Node) { - ni.setOversubscription(node) ni.setNodeState(node) - ni.setNodeGPUInfo(node) - ni.setRevocableZone(node) - if !ni.Ready() { - klog.Warningf("Failed to set node info, phase: %s, reason: %s", - ni.State.Phase, ni.State.Reason) + klog.Warningf("Failed to set node info for %s, phase: %s, reason: %s", + ni.Name, ni.State.Phase, ni.State.Reason) + return + } + + // Dry run, make sure all fields other than `State` are in the original state. + copy := ni.Clone() + copy.setNode(node) + copy.setNodeState(node) + if !copy.Ready() { + klog.Warningf("SetNode makes node %s not ready, phase: %s, reason: %s", + copy.Name, copy.State.Phase, copy.State.Reason) + // Set state of node to !Ready, left other fields untouched + ni.State = copy.State return } + ni.setNode(node) +} + +// setNode sets kubernetes node object to nodeInfo object without assertion +func (ni *NodeInfo) setNode(node *v1.Node) { + ni.setOversubscription(node) + ni.setNodeGPUInfo(node) + ni.setRevocableZone(node) + ni.Name = node.Name ni.Node = node @@ -319,14 +336,14 @@ func (ni *NodeInfo) SetNode(node *v1.Node) { for _, ti := range ni.Tasks { switch ti.Status { case Releasing: - ni.Idle.Sub(ti.Resreq) + ni.Idle.sub(ti.Resreq) // sub without assertion ni.Releasing.Add(ti.Resreq) ni.Used.Add(ti.Resreq) ni.AddGPUResource(ti.Pod) case Pipelined: ni.Pipelined.Add(ti.Resreq) default: - ni.Idle.Sub(ti.Resreq) + ni.Idle.sub(ti.Resreq) // sub without assertion ni.Used.Add(ti.Resreq) ni.AddGPUResource(ti.Pod) } diff --git a/pkg/scheduler/api/node_info_test.go b/pkg/scheduler/api/node_info_test.go index 978ef7c5c7..c14e3c3a46 100644 --- a/pkg/scheduler/api/node_info_test.go +++ b/pkg/scheduler/api/node_info_test.go @@ -167,3 +167,69 @@ func TestNodeInfo_RemovePod(t *testing.T) { } } } + +func TestNodeInfo_SetNode(t *testing.T) { + // case1 + case01Node1 := buildNode("n1", buildResourceList("10", "10G")) + case01Node2 := buildNode("n1", buildResourceList("8", "8G")) + case01Pod1 := buildPod("c1", "p1", "n1", v1.PodRunning, buildResourceList("1", "1G"), []metav1.OwnerReference{}, make(map[string]string)) + case01Pod2 := buildPod("c1", "p2", "n1", v1.PodRunning, buildResourceList("2", "2G"), []metav1.OwnerReference{}, make(map[string]string)) + case01Pod3 := buildPod("c1", "p3", "n1", v1.PodRunning, buildResourceList("6", "6G"), []metav1.OwnerReference{}, make(map[string]string)) + + tests := []struct { + name string + node *v1.Node + updated *v1.Node + pods []*v1.Pod + expected *NodeInfo + }{ + { + name: "add 3 running non-owner pod", + node: case01Node1, + updated: case01Node2, + pods: []*v1.Pod{case01Pod1, case01Pod2, case01Pod3}, + expected: &NodeInfo{ + Name: "n1", + Node: case01Node1, + Idle: buildResource("1", "1G"), + Used: buildResource("9", "9G"), + OversubscriptionResource: EmptyResource(), + Releasing: EmptyResource(), + Pipelined: EmptyResource(), + Allocatable: buildResource("10", "10G"), + Capability: buildResource("10", "10G"), + State: NodeState{Phase: NotReady, Reason: "OutOfSync"}, + Tasks: map[TaskID]*TaskInfo{ + "c1/p1": NewTaskInfo(case01Pod1), + "c1/p2": NewTaskInfo(case01Pod2), + "c1/p3": NewTaskInfo(case01Pod3), + }, + GPUDevices: make(map[int]*GPUDevice), + }, + }, + } + + for i, test := range tests { + ni := NewNodeInfo(test.node) + for _, pod := range test.pods { + pi := NewTaskInfo(pod) + ni.AddTask(pi) + ni.Name = pod.Spec.NodeName + } + + // OutOfSync. e.g.: nvidia-device-plugin is down causes gpus turn from 8 to 0 (node.status.allocatable."nvidia.com/gpu": 0) + ni.SetNode(test.updated) + if !nodeInfoEqual(ni, test.expected) { + t.Errorf("node info %d: \n expected\t%v, \n got\t\t%v \n", + i, test.expected, ni) + } + + // Recover. e.g.: nvidia-device-plugin is restarted successfully + ni.SetNode(test.node) + test.expected.State = NodeState{Phase: Ready} + if !nodeInfoEqual(ni, test.expected) { + t.Errorf("recovered %d: \n expected\t%v, \n got\t\t%v \n", + i, test.expected, ni) + } + } +} diff --git a/pkg/scheduler/api/resource_info.go b/pkg/scheduler/api/resource_info.go index 7101ec82a3..8cd8999aa6 100644 --- a/pkg/scheduler/api/resource_info.go +++ b/pkg/scheduler/api/resource_info.go @@ -197,10 +197,14 @@ func (r *Resource) Add(rr *Resource) *Resource { return r } -//Sub subtracts two Resource objects. +// Sub subtracts two Resource objects with assertion. func (r *Resource) Sub(rr *Resource) *Resource { assert.Assertf(rr.LessEqual(r, Zero), "resource is not sufficient to do operation: <%v> sub <%v>", r, rr) + return r.sub(rr) +} +// sub subtracts two Resource objects. +func (r *Resource) sub(rr *Resource) *Resource { r.MilliCPU -= rr.MilliCPU r.Memory -= rr.Memory