Skip to content

Commit

Permalink
[KEP-1224] Feature LendingLimit provides a guaranteed resource quota …
Browse files Browse the repository at this point in the history
…for user. (kubernetes-sigs#1385)

Signed-off-by: B1F030 <b1fzhang@gmail.com>
Co-authored-by: kerthcet <kerthcet@gmail.com>
  • Loading branch information
2 people authored and kannon92 committed Nov 19, 2024
1 parent 20db5b7 commit 60cf01b
Show file tree
Hide file tree
Showing 31 changed files with 1,341 additions and 198 deletions.
17 changes: 15 additions & 2 deletions apis/kueue/v1beta1/clusterqueue_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ type ClusterQueueSpec struct {
QueueingStrategy QueueingStrategy `json:"queueingStrategy,omitempty"`

// namespaceSelector defines which namespaces are allowed to submit workloads to
// this clusterQueue. Beyond this basic support for policy, an policy agent like
// this clusterQueue. Beyond this basic support for policy, a policy agent like
// Gatekeeper should be used to enforce more advanced policies.
// Defaults to null which is a nothing selector (no namespaces eligible).
// If set to an empty selector `{}`, then all namespaces are eligible.
Expand Down Expand Up @@ -97,7 +97,7 @@ type ClusterQueueSpec struct {
// +optional
AdmissionChecks []string `json:"admissionChecks,omitempty"`

// stopPolicy - if set to a value different than None, the ClusterQueue is considered Inactive, no new reservation being
// stopPolicy - if set to a value different from None, the ClusterQueue is considered Inactive, no new reservation being
// made.
//
// Depending on its value, its associated workloads will:
Expand Down Expand Up @@ -201,6 +201,19 @@ type ResourceQuota struct {
// borrowingLimit must be null if spec.cohort is empty.
// +optional
BorrowingLimit *resource.Quantity `json:"borrowingLimit,omitempty"`

// lendingLimit is the maximum amount of unused quota for the [flavor, resource]
// combination that this ClusterQueue can lend to other ClusterQueues in the same cohort.
// In total, at a given time, ClusterQueue reserves for its exclusive use
// a quantity of quota equals to nominalQuota - lendingLimit.
// If null, it means that there is no lending limit, meaning that
// all the nominalQuota can be borrowed by other clusterQueues in the cohort.
// If not null, it must be non-negative.
// lendingLimit must be null if spec.cohort is empty.
// This field is in alpha stage. To be able to use this field,
// enable the feature gate LendingLimit, which is disabled by default.
// +optional
LendingLimit *resource.Quantity `json:"lendingLimit,omitempty"`
}

// ResourceFlavorReference is the name of the ResourceFlavor.
Expand Down
5 changes: 5 additions & 0 deletions apis/kueue/v1beta1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions charts/kueue/templates/crd/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ spec:
namespaceSelector:
description: namespaceSelector defines which namespaces are allowed
to submit workloads to this clusterQueue. Beyond this basic support
for policy, an policy agent like Gatekeeper should be used to enforce
for policy, a policy agent like Gatekeeper should be used to enforce
more advanced policies. Defaults to null which is a nothing selector
(no namespaces eligible). If set to an empty selector `{}`, then
all namespaces are eligible.
Expand Down Expand Up @@ -309,6 +309,26 @@ spec:
empty.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
lendingLimit:
anyOf:
- type: integer
- type: string
description: lendingLimit is the maximum amount
of unused quota for the [flavor, resource] combination
that this ClusterQueue can lend to other ClusterQueues
in the same cohort. In total, at a given time,
ClusterQueue reserves for its exclusive use a
quantity of quota equals to nominalQuota - lendingLimit.
If null, it means that there is no lending limit,
meaning that all the nominalQuota can be borrowed
by other clusterQueues in the cohort. If not null,
it must be non-negative. lendingLimit must be
null if spec.cohort is empty. This field is in
alpha stage. To be able to use this field, enable
the feature gate LendingLimit, which is disabled
by default.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
name:
description: name of this resource.
type: string
Expand Down Expand Up @@ -361,7 +381,7 @@ spec:
x-kubernetes-list-type: atomic
stopPolicy:
default: None
description: "stopPolicy - if set to a value different than None,
description: "stopPolicy - if set to a value different from None,
the ClusterQueue is considered Inactive, no new reservation being
made. \n Depending on its value, its associated workloads will:
\n - None - Workloads are admitted - HoldAndDrain - Admitted workloads
Expand Down
9 changes: 9 additions & 0 deletions client-go/applyconfiguration/kueue/v1beta1/resourcequota.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 22 additions & 2 deletions config/components/crd/bases/kueue.x-k8s.io_clusterqueues.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ spec:
namespaceSelector:
description: namespaceSelector defines which namespaces are allowed
to submit workloads to this clusterQueue. Beyond this basic support
for policy, an policy agent like Gatekeeper should be used to enforce
for policy, a policy agent like Gatekeeper should be used to enforce
more advanced policies. Defaults to null which is a nothing selector
(no namespaces eligible). If set to an empty selector `{}`, then
all namespaces are eligible.
Expand Down Expand Up @@ -296,6 +296,26 @@ spec:
empty.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
lendingLimit:
anyOf:
- type: integer
- type: string
description: lendingLimit is the maximum amount
of unused quota for the [flavor, resource] combination
that this ClusterQueue can lend to other ClusterQueues
in the same cohort. In total, at a given time,
ClusterQueue reserves for its exclusive use a
quantity of quota equals to nominalQuota - lendingLimit.
If null, it means that there is no lending limit,
meaning that all the nominalQuota can be borrowed
by other clusterQueues in the cohort. If not null,
it must be non-negative. lendingLimit must be
null if spec.cohort is empty. This field is in
alpha stage. To be able to use this field, enable
the feature gate LendingLimit, which is disabled
by default.
pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$
x-kubernetes-int-or-string: true
name:
description: name of this resource.
type: string
Expand Down Expand Up @@ -348,7 +368,7 @@ spec:
x-kubernetes-list-type: atomic
stopPolicy:
default: None
description: "stopPolicy - if set to a value different than None,
description: "stopPolicy - if set to a value different from None,
the ClusterQueue is considered Inactive, no new reservation being
made. \n Depending on its value, its associated workloads will:
\n - None - Workloads are admitted - HoldAndDrain - Admitted workloads
Expand Down
Binary file added keps/1224-lending-limit/LendingLimit.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
10 changes: 7 additions & 3 deletions keps/1224-lending-limit/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ So we need a reservation design for resource requests and security reasons: `Len

In this proposal, `LendingLimit` is defined. The `ClusterQueue` will be limited to lend the specified quota to other ClusterQueues in the same cohort.

![Semantics](LendingLimit.png "Semantics of lendingLimit")

### User Stories (Optional)

#### Story 1
Expand Down Expand Up @@ -144,9 +146,11 @@ After the implementation PR is merged, add the names of the tests here.
- When cq-a's BorrowingLimit unset, cq-a can borrow as much as `(cq-b's LendingLimit + cq-c's LendingLimit)`.
- When cq-a's BorrowingLimit set, cq-a can borrow as much as `min((cq-b's LendingLimit + cq-c's LendingLimit), cq-a's BorrowingLimit)`.
- In a cohort with 2 ClusterQueues cq-a, cq-b and 2 ResourceFlavors rf-a, rf-b:
- When rf-b's LendingLimit set, and cq-a's FlavorFungibility set to `WhenCanBorrow: TryNextFlavor`:
- When rf-a's BorrowingLimit unset, cq-a can borrow as much as `rf-b's LendingLimit`.
- When rf-a's BorrowingLimit set, cq-a can borrow as much as `min(rf-b's LendingLimit, rf-a's BorrowingLimit)`.
- In cq-b, when rf-a's LendingLimit set, and cq-a's FlavorFungibility set to `whenCanBorrow: Borrow`:
- In cq-a, when rf-a's BorrowingLimit unset, cq-a can borrow as much as `rf-a's LendingLimit`.
- In cq-a, when rf-a's BorrowingLimit set, cq-a can borrow as much as `min(rf-a's LendingLimit, rf-a's BorrowingLimit)`.

We will not consider the situation that **when cq-a's FlavorFungibility set to `whenCanBorrow: TryNextFlavor`**, since in this case, borrow will not happen.

### Graduation Criteria

Expand Down
22 changes: 16 additions & 6 deletions pkg/cache/cache_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

kueue "sigs.k8s.io/kueue/apis/kueue/v1beta1"
"sigs.k8s.io/kueue/pkg/features"
utiltesting "sigs.k8s.io/kueue/pkg/util/testing"
"sigs.k8s.io/kueue/pkg/workload"
)
Expand Down Expand Up @@ -89,11 +90,12 @@ func TestCacheClusterQueueOperations(t *testing.T) {
return nil
}
cases := []struct {
name string
operation func(*Cache) error
clientObbjects []client.Object
wantClusterQueues map[string]*ClusterQueue
wantCohorts map[string]sets.Set[string]
name string
operation func(*Cache) error
clientObbjects []client.Object
wantClusterQueues map[string]*ClusterQueue
wantCohorts map[string]sets.Set[string]
enableLendingLimit bool
}{
{
name: "add",
Expand Down Expand Up @@ -397,7 +399,7 @@ func TestCacheClusterQueueOperations(t *testing.T) {
*utiltesting.MakeClusterQueue("e").
ResourceGroup(
*utiltesting.MakeFlavorQuotas("default").
Resource(corev1.ResourceCPU, "5", "5").
Resource(corev1.ResourceCPU, "5", "5", "4").
Obj()).
Cohort("two").
NamespaceSelector(nil).
Expand Down Expand Up @@ -484,11 +486,17 @@ func TestCacheClusterQueueOperations(t *testing.T) {
corev1.ResourceCPU: {
Nominal: 5_000,
BorrowingLimit: ptr.To[int64](5_000),
LendingLimit: ptr.To[int64](4_000),
},
}},
},
LabelKeys: sets.New("cpuType", "region"),
}},
GuaranteedQuota: FlavorResourceQuantities{
"default": {
"cpu": 1_000,
},
},
NamespaceSelector: labels.Nothing(),
FlavorFungibility: defaultFlavorFungibility,
Usage: FlavorResourceQuantities{
Expand Down Expand Up @@ -518,6 +526,7 @@ func TestCacheClusterQueueOperations(t *testing.T) {
"one": sets.New("b"),
"two": sets.New("a", "c", "e", "f"),
},
enableLendingLimit: true,
},
{
name: "delete",
Expand Down Expand Up @@ -1039,6 +1048,7 @@ func TestCacheClusterQueueOperations(t *testing.T) {

for _, tc := range cases {
t.Run(tc.name, func(t *testing.T) {
defer features.SetFeatureGateDuringTest(t, features.LendingLimit, tc.enableLendingLimit)()
cache := New(utiltesting.NewFakeClient(tc.clientObbjects...))
if err := tc.operation(cache); err != nil {
t.Errorf("Unexpected error during test operation: %s", err)
Expand Down
Loading

0 comments on commit 60cf01b

Please sign in to comment.