Skip to content

Commit

Permalink
cherry-pick kubernetes-sigs#3536
Browse files Browse the repository at this point in the history
  • Loading branch information
gabesaba committed Nov 15, 2024
1 parent 982a9f3 commit 3d6ead8
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 62 deletions.
14 changes: 14 additions & 0 deletions pkg/cache/clusterqueue_snapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,20 @@ func (c *ClusterQueueSnapshot) Available(fr resources.FlavorResource) int64 {
return max(0, capacityAvailable)
}

func (c *ClusterQueueSnapshot) PotentialAvailable(fr resources.FlavorResource) int64 {
if c.Cohort == nil {
return c.nominal(fr)
}
potential := c.RequestableCohortQuota(fr)

// if the borrowing limit exists, we cap our potential capacity by the borrowing limit.
if borrowingLimit := c.borrowingLimit(fr); borrowingLimit != nil {
withBorrowingRemaining := c.nominal(fr) + *borrowingLimit
potential = min(potential, withBorrowingRemaining)
}
return max(0, potential)
}

func (c *ClusterQueueSnapshot) nominal(fr resources.FlavorResource) int64 {
if quota := c.QuotaFor(fr); quota != nil {
return quota.Nominal
Expand Down
4 changes: 4 additions & 0 deletions pkg/resources/requests.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,7 @@ func ResourceQuantity(name corev1.ResourceName, v int64) resource.Quantity {
return *resource.NewQuantity(v, resource.DecimalSI)
}
}
func ResourceQuantityString(name corev1.ResourceName, v int64) string {
rq := ResourceQuantity(name, v)
return rq.String()
}
65 changes: 23 additions & 42 deletions pkg/scheduler/flavorassigner/flavorassigner.go
Original file line number Diff line number Diff line change
Expand Up @@ -598,56 +598,37 @@ func flavorSelector(spec *corev1.PodSpec, allowedKeys sets.Set[string]) nodeaffi
func (a *FlavorAssigner) fitsResourceQuota(log logr.Logger, fr resources.FlavorResource, val int64, rQuota *cache.ResourceQuota) (granularMode, bool, *Status) {
var status Status
var borrow bool
used := a.cq.Usage[fr]
mode := noFit
if val <= rQuota.Nominal {
// The request can be satisfied by the nominal quota, assuming quota is
// reclaimed from the cohort or assuming all active workloads in the
// ClusterQueue are preempted.
mode = preempt
}
cohortAvailable := rQuota.Nominal
if a.cq.Cohort != nil {
cohortAvailable = a.cq.RequestableCohortQuota(fr)
}

if a.canPreemptWhileBorrowing() {
// when preemption with borrowing is enabled, we can succeed to admit the
// workload if preemption is used.
if (rQuota.BorrowingLimit == nil || val <= rQuota.Nominal+*rQuota.BorrowingLimit) && val <= cohortAvailable {
mode = preempt
borrow = val > rQuota.Nominal
}
}
if rQuota.BorrowingLimit != nil && used+val > rQuota.Nominal+*rQuota.BorrowingLimit {
status.append(fmt.Sprintf("borrowing limit for %s in flavor %s exceeded", fr.Resource, fr.Flavor))
return mode, borrow, &status
}

if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) {
mode = reclaim
}
available := a.cq.Available(fr)
maxCapacity := a.cq.PotentialAvailable(fr)

cohortUsed := used
if a.cq.Cohort != nil {
cohortUsed = a.cq.UsedCohortQuota(fr)
// No Fit
if val > maxCapacity {
status.append(fmt.Sprintf("insufficient quota for %s in flavor %s, request > maximum capacity (%s > %s)",
fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val), resources.ResourceQuantityString(fr.Resource, maxCapacity)))
return noFit, false, &status
}

lack := cohortUsed + val - cohortAvailable
if lack <= 0 {
return fit, used+val > rQuota.Nominal, nil
// Fit
if val <= available {
return fit, a.cq.Usage[fr]+val > rQuota.Nominal, nil
}

lackQuantity := resources.ResourceQuantity(fr.Resource, lack)
msg := fmt.Sprintf("insufficient unused quota in cohort for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity)
if a.cq.Cohort == nil {
if mode == noFit {
msg = fmt.Sprintf("insufficient quota for %s in flavor %s in ClusterQueue", fr.Resource, fr.Flavor)
} else {
msg = fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed", fr.Resource, fr.Flavor, &lackQuantity)
// Check if preemption is possible
mode := noFit
if val <= rQuota.Nominal {
mode = preempt
if a.oracle.IsReclaimPossible(log, a.cq, *a.wl, fr, val) {
mode = reclaim
}
} else if a.canPreemptWhileBorrowing() {
mode = preempt
borrow = true
}
status.append(msg)

status.append(fmt.Sprintf("insufficient unused quota for %s in flavor %s, %s more needed",
fr.Resource, fr.Flavor, resources.ResourceQuantityString(fr.Resource, val-available)))

return mode, borrow, &status
}

Expand Down
33 changes: 16 additions & 17 deletions pkg/scheduler/flavorassigner/flavorassigner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ package flavorassigner

import (
"context"
"fmt"
"testing"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -259,7 +258,7 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for memory in flavor b_one in ClusterQueue",
"insufficient quota for memory in flavor b_one, request > maximum capacity (10Mi > 1Mi)",
},
},
Count: 1,
Expand Down Expand Up @@ -373,9 +372,9 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient unused quota in cohort for cpu in flavor one, 1 more needed",
"insufficient unused quota in cohort for memory in flavor two, 5Mi more needed",
"insufficient unused quota in cohort for example.com/gpu in flavor b_one, 1 more needed",
"insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)",
"insufficient unused quota for memory in flavor two, 5Mi more needed",
"insufficient unused quota for example.com/gpu in flavor b_one, 1 more needed",
},
},
Count: 1,
Expand Down Expand Up @@ -414,8 +413,8 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for cpu in flavor one in ClusterQueue",
"insufficient quota for memory in flavor two in ClusterQueue",
"insufficient quota for cpu in flavor one, request > maximum capacity (3 > 2)",
"insufficient quota for memory in flavor two, request > maximum capacity (10Mi > 5Mi)",
},
},
Count: 1,
Expand Down Expand Up @@ -839,7 +838,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: resource.MustParse("2"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -882,7 +881,7 @@ func TestAssignFlavors(t *testing.T) {
},

Status: &Status{
reasons: []string{"borrowing limit for cpu in flavor one exceeded"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -960,7 +959,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: resource.MustParse("2"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 2 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 2 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -1077,7 +1076,7 @@ func TestAssignFlavors(t *testing.T) {
},
Status: &Status{
reasons: []string{
"insufficient quota for cpu in flavor one in ClusterQueue",
"insufficient quota for cpu in flavor one, request > maximum capacity (12 > 4)",
"insufficient unused quota for cpu in flavor tainted, 3 more needed",
},
},
Expand Down Expand Up @@ -1173,7 +1172,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourcePods: resource.MustParse("3"),
},
Status: &Status{
reasons: []string{fmt.Sprintf("insufficient quota for %s in flavor default in ClusterQueue", corev1.ResourcePods)},
reasons: []string{"insufficient quota for pods in flavor default, request > maximum capacity (3 > 2)"},
},
Count: 3,
}},
Expand Down Expand Up @@ -1510,7 +1509,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1565,7 +1564,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1664,7 +1663,7 @@ func TestAssignFlavors(t *testing.T) {
{
Name: "main",
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 11 more needed"},
reasons: []string{"insufficient quota for cpu in flavor one, request > maximum capacity (12 > 11)"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down Expand Up @@ -1821,7 +1820,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourcePods: resource.MustParse("1"),
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 1 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 1 more needed"},
},
Count: 1,
}},
Expand Down Expand Up @@ -1868,7 +1867,7 @@ func TestAssignFlavors(t *testing.T) {
corev1.ResourceCPU: {Name: "one", Mode: Preempt, TriedFlavorIdx: 0},
},
Status: &Status{
reasons: []string{"insufficient unused quota in cohort for cpu in flavor one, 10 more needed"},
reasons: []string{"insufficient unused quota for cpu in flavor one, 10 more needed"},
},
Requests: corev1.ResourceList{
corev1.ResourceCPU: resource.MustParse("12"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for cpu in flavor default, 2 more needed",
Message: "couldn't assign flavors to pod set main: insufficient quota for cpu in flavor default, request > maximum capacity (10 > 8)",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down
4 changes: 2 additions & 2 deletions test/integration/scheduler/scheduler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2294,7 +2294,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 1Gi more needed",
Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down Expand Up @@ -2336,7 +2336,7 @@ var _ = ginkgo.Describe("Scheduler", func() {
Type: kueue.WorkloadQuotaReserved,
Status: metav1.ConditionFalse,
Reason: "Pending",
Message: "couldn't assign flavors to pod set main: insufficient unused quota in cohort for memory in flavor on-demand, 2Gi more needed",
Message: "couldn't assign flavors to pod set main: insufficient unused quota for memory in flavor on-demand, 1Gi more needed",
}, util.IgnoreConditionTimestampsAndObservedGeneration),
))
}, util.Timeout, util.Interval).Should(gomega.Succeed())
Expand Down

0 comments on commit 3d6ead8

Please sign in to comment.