Skip to content

Commit

Permalink
scheduler: support pod preemption from numa awareless reservation (#2204
Browse files Browse the repository at this point in the history
)

Signed-off-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
Co-authored-by: wangjianyu.wjy <wangjianyu.wjy@alibaba-inc.com>
  • Loading branch information
ZiMengSheng and wangjianyu.wjy authored Sep 18, 2024
1 parent beab44e commit cd6ce25
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
13 changes: 11 additions & 2 deletions pkg/scheduler/plugins/nodenumaresource/preempt.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ import (
"k8s.io/klog/v2"
"k8s.io/kubernetes/pkg/scheduler/framework"

"github.com/koordinator-sh/koordinator/pkg/scheduler/frameworkext"
"github.com/koordinator-sh/koordinator/pkg/util/cpuset"
)

Expand Down Expand Up @@ -141,7 +142,7 @@ func (p *Plugin) AddPod(_ context.Context, cycleState *framework.CycleState, pre
state.schedulingStateData.lock.Unlock()

rInfo := p.getPodNominatedReservationInfo(pod, nodeName)
if rInfo == nil { // preempt node unallocated resources
if rInfo == nil || p.notNUMAAwareReservation(rInfo) { // preempt node unallocated resources
if nodeState.nodeAlloc == nil {
nodeState.nodeAlloc = newPreemptibleAlloc()
}
Expand Down Expand Up @@ -197,7 +198,7 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState,
state.schedulingStateData.lock.Unlock()

rInfo := p.getPodNominatedReservationInfo(pod, nodeName)
if rInfo == nil { // preempt node unallocated resources
if rInfo == nil || p.notNUMAAwareReservation(rInfo) { // preempt node unallocated resources
if nodeState.nodeAlloc == nil {
nodeState.nodeAlloc = newPreemptibleAlloc()
}
Expand All @@ -219,6 +220,14 @@ func (p *Plugin) RemovePod(_ context.Context, cycleState *framework.CycleState,
return nil
}

func (p *Plugin) notNUMAAwareReservation(rInfo *frameworkext.ReservationInfo) bool {
podAllocatedCPUs, podAllocatedNUMAResources := p.getPodAllocated(rInfo.Pod, rInfo.GetNodeName())
if podAllocatedCPUs.IsEmpty() && len(podAllocatedNUMAResources) == 0 {
return true
}
return false
}

func (p *Plugin) getPodAllocated(pod *corev1.Pod, nodeName string) (cpus cpuset.CPUSet, numaResources map[int]corev1.ResourceList) {
podAllocatedCPUs, ok := p.resourceManager.GetAllocatedCPUSet(nodeName, pod.UID)
if ok && !podAllocatedCPUs.IsEmpty() {
Expand Down
8 changes: 8 additions & 0 deletions pkg/scheduler/plugins/nodenumaresource/resource_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,18 +197,26 @@ func (c *resourceManager) Allocate(node *corev1.Node, pod *corev1.Pod, options *
Name: pod.Name,
CPUExclusivePolicy: options.cpuExclusivePolicy,
}
klog.V(5).Infof("Allocate pod %s/%s on node %s, numaNodeAffinity: %+v, requestCPUBind %v", pod.Namespace, pod.Name, node.Name, options.hint, options.requestCPUBind)
if options.hint.NUMANodeAffinity != nil {
resources, err := c.allocateResourcesByHint(node, pod, options)
if err != nil {
return nil, err
}
if len(resources) == 0 {
klog.Warningf("succeed allocateResourcesByHint but allocatedNUMAResources nil, options: %+v", options)
}
allocation.NUMANodeResources = resources

}
if options.requestCPUBind {
cpus, err := c.allocateCPUSet(node, pod, allocation.NUMANodeResources, options)
if err != nil {
return nil, framework.NewStatus(framework.Unschedulable, err.Error())
}
if cpus.IsEmpty() {
klog.Warningf("succeed allocateCPUSet but allocatedCPUs empty, options: %+v, allocation.NUMANodeResources: %+v", options, allocation.NUMANodeResources)
}
allocation.CPUSet = cpus
}
return allocation, nil
Expand Down

0 comments on commit cd6ce25

Please sign in to comment.