diff --git a/apis/extension/elastic_quota.go b/apis/extension/elastic_quota.go index a7f4f4a8f..f05c819a0 100644 --- a/apis/extension/elastic_quota.go +++ b/apis/extension/elastic_quota.go @@ -39,6 +39,7 @@ const ( LabelQuotaIsRoot = QuotaKoordinatorPrefix + "/is-root" LabelQuotaTreeID = QuotaKoordinatorPrefix + "/tree-id" LabelQuotaIgnoreDefaultTree = QuotaKoordinatorPrefix + "/ignore-default-tree" + LabelPreemptible = QuotaKoordinatorPrefix + "/preemptible" AnnotationSharedWeight = QuotaKoordinatorPrefix + "/shared-weight" AnnotationRuntime = QuotaKoordinatorPrefix + "/runtime" AnnotationRequest = QuotaKoordinatorPrefix + "/request" @@ -70,6 +71,10 @@ func IsTreeRootQuota(quota *v1alpha1.ElasticQuota) bool { return quota.Labels[LabelQuotaIsRoot] == "true" } +func IsPodNonPreemptible(pod *corev1.Pod) bool { + return pod.Labels[LabelPreemptible] == "false" +} + func GetQuotaTreeID(quota *v1alpha1.ElasticQuota) string { return quota.Labels[LabelQuotaTreeID] } diff --git a/pkg/scheduler/plugins/elasticquota/core/group_quota_manager.go b/pkg/scheduler/plugins/elasticquota/core/group_quota_manager.go index 290d5606e..e1541af16 100644 --- a/pkg/scheduler/plugins/elasticquota/core/group_quota_manager.go +++ b/pkg/scheduler/plugins/elasticquota/core/group_quota_manager.go @@ -224,7 +224,7 @@ func (gqm *GroupQuotaManager) recursiveUpdateGroupTreeWithDeltaRequest(deltaReq // updateGroupDeltaUsedNoLock updates the usedQuota of a node, it also updates all parent nodes // no need to lock gqm.hierarchyUpdateLock -func (gqm *GroupQuotaManager) updateGroupDeltaUsedNoLock(quotaName string, delta v1.ResourceList) { +func (gqm *GroupQuotaManager) updateGroupDeltaUsedNoLock(quotaName string, delta, deltaNonPreemptibleUsed v1.ResourceList) { curToAllParInfos := gqm.getCurToAllParentGroupQuotaInfoNoLock(quotaName) allQuotaInfoLen := len(curToAllParInfos) if allQuotaInfoLen <= 0 { @@ -234,7 +234,7 @@ func (gqm *GroupQuotaManager) updateGroupDeltaUsedNoLock(quotaName string, delta defer gqm.scopedLockForQuotaInfo(curToAllParInfos)() for i := 0; i < allQuotaInfoLen; i++ { quotaInfo := curToAllParInfos[i] - quotaInfo.addUsedNonNegativeNoLock(delta) + quotaInfo.addUsedNonNegativeNoLock(delta, deltaNonPreemptibleUsed) } if utilfeature.DefaultFeatureGate.Enabled(features.ElasticQuotaGuaranteeUsage) { @@ -472,7 +472,7 @@ func (gqm *GroupQuotaManager) buildSubParGroupTopoNoLock() { // ResetAllGroupQuotaNoLock no need to lock gqm.lock func (gqm *GroupQuotaManager) resetAllGroupQuotaNoLock() { - childRequestMap, childUsedMap := make(quotaResMapType), make(quotaResMapType) + childRequestMap, childNonPreemptibleUsedMap, childUsedMap := make(quotaResMapType), make(quotaResMapType), make(quotaResMapType) for quotaName, topoNode := range gqm.quotaTopoNodeMap { if quotaName == extension.RootQuotaName { gqm.resetRootQuotaUsedAndRequest() @@ -481,6 +481,7 @@ func (gqm *GroupQuotaManager) resetAllGroupQuotaNoLock() { topoNode.quotaInfo.lock.Lock() if !topoNode.quotaInfo.IsParent { childRequestMap[quotaName] = topoNode.quotaInfo.CalculateInfo.ChildRequest.DeepCopy() + childNonPreemptibleUsedMap[quotaName] = topoNode.quotaInfo.CalculateInfo.NonPreemptibleUsed.DeepCopy() childUsedMap[quotaName] = topoNode.quotaInfo.CalculateInfo.Used.DeepCopy() } topoNode.quotaInfo.clearForResetNoLock() @@ -500,7 +501,7 @@ func (gqm *GroupQuotaManager) resetAllGroupQuotaNoLock() { for quotaName, topoNode := range gqm.quotaTopoNodeMap { if !topoNode.quotaInfo.IsParent { gqm.updateGroupDeltaRequestNoLock(quotaName, childRequestMap[quotaName]) - gqm.updateGroupDeltaUsedNoLock(quotaName, childUsedMap[quotaName]) + gqm.updateGroupDeltaUsedNoLock(quotaName, childUsedMap[quotaName], childNonPreemptibleUsedMap[quotaName]) } } } @@ -615,28 +616,36 @@ func (gqm *GroupQuotaManager) updatePodUsedNoLock(quotaName string, oldPod, newP return } - var oldPodUsed, newPodUsed v1.ResourceList + var oldPodUsed, newPodUsed, oldNonPreemptibleUsed, newNonPreemptibleUsed v1.ResourceList if oldPod != nil { oldPodUsed, _ = PodRequestsAndLimits(oldPod) + if extension.IsPodNonPreemptible(oldPod) { + oldNonPreemptibleUsed = oldPodUsed + } } else { oldPodUsed = make(v1.ResourceList) + oldNonPreemptibleUsed = make(v1.ResourceList) } if newPod != nil { newPodUsed, _ = PodRequestsAndLimits(newPod) + if extension.IsPodNonPreemptible(newPod) { + newNonPreemptibleUsed = newPodUsed + } } else { newPodUsed = make(v1.ResourceList) + newNonPreemptibleUsed = make(v1.ResourceList) } deltaUsed := quotav1.Subtract(newPodUsed, oldPodUsed) - if quotav1.IsZero(deltaUsed) { + deltaNonPreemptibleUsed := quotav1.Subtract(newNonPreemptibleUsed, oldNonPreemptibleUsed) + if quotav1.IsZero(deltaUsed) && quotav1.IsZero(deltaNonPreemptibleUsed) { if klog.V(5).Enabled() { - klog.Infof("updatePodUsed, deltaUsedIsZero, quotaName: %v, podName: %v, podUsed: %v", - quotaName, getPodName(oldPod, newPod), util.DumpJSON(newPodUsed)) + klog.Infof("updatePodUsed, deltaUsedIsZero and deltaNonPreemptibleUsedIsZero, quotaName: %v, podName: %v, podUsed: %v, podNonPreemptibleUsed: %v", + quotaName, getPodName(oldPod, newPod), util.DumpJSON(newPodUsed), util.DumpJSON(newNonPreemptibleUsed)) } - return } - gqm.updateGroupDeltaUsedNoLock(quotaName, deltaUsed) + gqm.updateGroupDeltaUsedNoLock(quotaName, deltaUsed, deltaNonPreemptibleUsed) } func (gqm *GroupQuotaManager) updatePodCacheNoLock(quotaName string, pod *v1.Pod, isAdd bool) { @@ -878,21 +887,25 @@ func (gqm *GroupQuotaManager) resetRootQuotaUsedAndRequest() { used := v1.ResourceList{} request := v1.ResourceList{} + nonPreemptUsed := v1.ResourceList{} systemQuotaInfo := gqm.getQuotaInfoByNameNoLock(extension.SystemQuotaName) if systemQuotaInfo != nil { used = quotav1.Add(used, systemQuotaInfo.GetUsed()) request = quotav1.Add(request, systemQuotaInfo.GetRequest()) + nonPreemptUsed = quotav1.Add(nonPreemptUsed, systemQuotaInfo.GetNonPreemptibleUsed()) } defaultQuotaInfo := gqm.getQuotaInfoByNameNoLock(extension.DefaultQuotaName) if defaultQuotaInfo != nil { used = quotav1.Add(used, defaultQuotaInfo.GetUsed()) request = quotav1.Add(request, defaultQuotaInfo.GetRequest()) + nonPreemptUsed = quotav1.Add(nonPreemptUsed, defaultQuotaInfo.GetNonPreemptibleUsed()) } rootQuotaInfo.CalculateInfo.Used = used rootQuotaInfo.CalculateInfo.Request = request + rootQuotaInfo.CalculateInfo.NonPreemptibleUsed = nonPreemptUsed } func (gqm *GroupQuotaManager) recursiveUpdateGroupTreeWithDeltaAllocated(deltaAllocated v1.ResourceList, curToAllParInfos []*QuotaInfo) { diff --git a/pkg/scheduler/plugins/elasticquota/core/group_quota_manager_test.go b/pkg/scheduler/plugins/elasticquota/core/group_quota_manager_test.go index 24223d3f5..436ae0f4f 100644 --- a/pkg/scheduler/plugins/elasticquota/core/group_quota_manager_test.go +++ b/pkg/scheduler/plugins/elasticquota/core/group_quota_manager_test.go @@ -430,31 +430,37 @@ func TestGroupQuotaManager_UpdateQuotaRequest(t *testing.T) { assert.Equal(t, createResourceList(43, 80*GigaByte), runtime) } -func TestGroupQuotaManager_UpdateGroupDeltaUsed(t *testing.T) { +func TestGroupQuotaManager_UpdateGroupDeltaUsedAndNonPreemptibleUsed(t *testing.T) { gqm := NewGroupQuotaManagerForTest() AddQuotaToManager(t, gqm, "test1", extension.RootQuotaName, 96, 160*GigaByte, 50, 80*GigaByte, true, false) - // 1. test1 used[120, 290] runtime == maxQuota + // 1. test1 used[120, 290] runtime == maxQuota nonPreemptibleUsed[20, 30] used := createResourceList(120, 290*GigaByte) - gqm.updateGroupDeltaUsedNoLock("test1", used) + nonPreemptibleUsed := createResourceList(20, 30*GigaByte) + gqm.updateGroupDeltaUsedNoLock("test1", used, nonPreemptibleUsed) quotaInfo := gqm.GetQuotaInfoByName("test1") assert.NotNil(t, quotaInfo) assert.Equal(t, used, quotaInfo.CalculateInfo.Used) + assert.Equal(t, nonPreemptibleUsed, quotaInfo.CalculateInfo.NonPreemptibleUsed) // 2. used increases to [130,300] used = createResourceList(10, 10*GigaByte) - gqm.updateGroupDeltaUsedNoLock("test1", used) + nonPreemptibleUsed = createResourceList(10, 10*GigaByte) + gqm.updateGroupDeltaUsedNoLock("test1", used, nonPreemptibleUsed) quotaInfo = gqm.GetQuotaInfoByName("test1") assert.NotNil(t, quotaInfo) assert.Equal(t, createResourceList(130, 300*GigaByte), quotaInfo.CalculateInfo.Used) + assert.Equal(t, createResourceList(30, 40*GigaByte), quotaInfo.CalculateInfo.NonPreemptibleUsed) // 3. used decreases to [90,100] used = createResourceList(-40, -200*GigaByte) - gqm.updateGroupDeltaUsedNoLock("test1", used) + nonPreemptibleUsed = createResourceList(-15, -20*GigaByte) + gqm.updateGroupDeltaUsedNoLock("test1", used, nonPreemptibleUsed) quotaInfo = gqm.GetQuotaInfoByName("test1") assert.NotNil(t, quotaInfo) assert.Equal(t, createResourceList(90, 100*GigaByte), quotaInfo.CalculateInfo.Used) + assert.Equal(t, createResourceList(15, 20*GigaByte), quotaInfo.CalculateInfo.NonPreemptibleUsed) } func TestGroupQuotaManager_MultiQuotaAdd(t *testing.T) { @@ -712,7 +718,7 @@ func TestGroupQuotaManager_MultiUpdateQuotaRequest_WithScaledMinQuota2(t *testin assert.Equal(t, createResourceList2(66666, 200*GigaByte/3), quotaInfo.CalculateInfo.AutoScaleMin) } -func TestGroupQuotaManager_MultiUpdateQuotaUsed(t *testing.T) { +func TestGroupQuotaManager_MultiUpdateQuotaUsedAndNonPreemptibleUsed(t *testing.T) { gqm := NewGroupQuotaManagerForTest() AddQuotaToManager(t, gqm, "test1", extension.RootQuotaName, 96, 160*GigaByte, 50, 80*GigaByte, true, true) @@ -720,18 +726,22 @@ func TestGroupQuotaManager_MultiUpdateQuotaUsed(t *testing.T) { AddQuotaToManager(t, gqm, "test1-sub1-1", "test1-sub1", 96, 160*GigaByte, 50, 80*GigaByte, true, false) used := createResourceList(120, 290*GigaByte) - gqm.updateGroupDeltaUsedNoLock("test1-sub1", used) + nonPreemptibleUsed := createResourceList(50, 100*GigaByte) + gqm.updateGroupDeltaUsedNoLock("test1-sub1", used, nonPreemptibleUsed) quotaInfo := gqm.GetQuotaInfoByName("test1-sub1") assert.True(t, quotaInfo != nil) assert.Equal(t, used, quotaInfo.CalculateInfo.Used) + assert.Equal(t, nonPreemptibleUsed, quotaInfo.CalculateInfo.NonPreemptibleUsed) quotaInfo = gqm.GetQuotaInfoByName("test1-sub1") assert.True(t, quotaInfo != nil) assert.Equal(t, used, quotaInfo.CalculateInfo.Used) + assert.Equal(t, nonPreemptibleUsed, quotaInfo.CalculateInfo.NonPreemptibleUsed) quotaInfo = gqm.GetQuotaInfoByName("test1") assert.True(t, quotaInfo != nil) assert.Equal(t, used, quotaInfo.CalculateInfo.Used) + assert.Equal(t, nonPreemptibleUsed, quotaInfo.CalculateInfo.NonPreemptibleUsed) } func TestGroupQuotaManager_UpdateQuotaParentName(t *testing.T) { @@ -758,7 +768,7 @@ func TestGroupQuotaManager_UpdateQuotaParentName(t *testing.T) { // a-123 request [60,100] request := createResourceList(60, 100*GigaByte) gqm.updateGroupDeltaRequestNoLock("a-123", request) - gqm.updateGroupDeltaUsedNoLock("a-123", request) + gqm.updateGroupDeltaUsedNoLock("a-123", request, createResourceList(0, 0)) runtime := gqm.RefreshRuntime("a-123") assert.Equal(t, request, runtime) @@ -771,7 +781,7 @@ func TestGroupQuotaManager_UpdateQuotaParentName(t *testing.T) { // test2-a request [20,40] request = createResourceList(20, 40*GigaByte) gqm.updateGroupDeltaRequestNoLock("test2-a", request) - gqm.updateGroupDeltaUsedNoLock("test2-a", request) + gqm.updateGroupDeltaUsedNoLock("test2-a", request, createResourceList(0, 0)) runtime = gqm.RefreshRuntime("test2-a") assert.Equal(t, request, runtime) @@ -855,7 +865,7 @@ func TestGroupQuotaManager_UpdateClusterTotalResource(t *testing.T) { assert.Equal(t, totalRes, quotaTotalRes) sysUsed := createResourceList(10, 30*GigaByte) - gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, sysUsed) + gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, sysUsed, createResourceList(0, 0)) assert.Equal(t, sysUsed, gqm.GetQuotaInfoByName(extension.SystemQuotaName).GetUsed()) // 90, 510 @@ -867,21 +877,21 @@ func TestGroupQuotaManager_UpdateClusterTotalResource(t *testing.T) { assert.Equal(t, delta, quotaTotalRes) // 80, 480 - gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, createResourceList(10, 30)) + gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, createResourceList(10, 30), createResourceList(0, 0)) delta = quotav1.Subtract(delta, createResourceList(10, 30)) assert.Equal(t, totalRes, gqm.totalResource) assert.Equal(t, delta, gqm.totalResourceExceptSystemAndDefaultUsed) // 70, 450 defaultUsed := createResourceList(10, 30) - gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed) + gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed, createResourceList(0, 0)) assert.Equal(t, defaultUsed, gqm.GetQuotaInfoByName(extension.DefaultQuotaName).GetUsed()) delta = quotav1.Subtract(delta, defaultUsed) assert.Equal(t, totalRes, gqm.totalResource) assert.Equal(t, delta, gqm.totalResourceExceptSystemAndDefaultUsed) // 60 420 - gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed) + gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed, createResourceList(0, 0)) delta = quotav1.Subtract(delta, defaultUsed) assert.Equal(t, totalRes, gqm.totalResource) assert.Equal(t, delta, gqm.totalResourceExceptSystemAndDefaultUsed) @@ -1428,7 +1438,7 @@ func TestGroupQuotaManager_GetQuotaInformationForSyncHandler(t *testing.T) { gqm.UpdateClusterTotalResource(createResourceList(1000, 1000)) gqm.updateGroupDeltaRequestNoLock("1", createResourceList(100, 100)) gqm.RefreshRuntime("1") - gqm.updateGroupDeltaUsedNoLock("1", createResourceList(10, 10)) + gqm.updateGroupDeltaUsedNoLock("1", createResourceList(10, 10), createResourceList(0, 0)) used, request, childRequest, runtime, _, _, err := gqm.GetQuotaInformationForSyncHandler("1") assert.Nil(t, err) assert.Equal(t, used, createResourceList(10, 10)) @@ -1446,7 +1456,7 @@ func TestGroupQuotaManager_GetQuotaInformationForSyncHandlerWithUsageGuarantee(t gqm.UpdateClusterTotalResource(createResourceList(1000, 1000)) gqm.updateGroupDeltaRequestNoLock("1", createResourceList(100, 100)) gqm.RefreshRuntime("1") - gqm.updateGroupDeltaUsedNoLock("1", createResourceList(10, 10)) + gqm.updateGroupDeltaUsedNoLock("1", createResourceList(10, 10), createResourceList(0, 0)) used, request, childRequest, runtime, guaranteed, allocated, err := gqm.GetQuotaInformationForSyncHandler("1") assert.Nil(t, err) assert.Equal(t, used, createResourceList(10, 10)) @@ -1492,12 +1502,12 @@ func TestGroupQuotaManager_UpdateRootQuotaUsed(t *testing.T) { sysUsed := createResourceList(10, 30) expectedTotalUsed = quotav1.Add(expectedTotalUsed, sysUsed) - gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, sysUsed) + gqm.updateGroupDeltaUsedNoLock(extension.SystemQuotaName, sysUsed, createResourceList(0, 0)) assert.Equal(t, sysUsed, gqm.GetQuotaInfoByName(extension.SystemQuotaName).GetUsed()) defaultUsed := createResourceList(2, 5) expectedTotalUsed = quotav1.Add(expectedTotalUsed, defaultUsed) - gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed) + gqm.updateGroupDeltaUsedNoLock(extension.DefaultQuotaName, defaultUsed, createResourceList(0, 0)) assert.Equal(t, defaultUsed, gqm.GetQuotaInfoByName(extension.DefaultQuotaName).GetUsed()) //case1: no quota, root quota used @@ -1530,8 +1540,8 @@ func TestGroupQuotaManager_UpdateRootQuotaUsed(t *testing.T) { gqm.UpdateQuota(qi1, false) gqm.UpdateQuota(qi2, false) gqm.UpdateQuota(qi3, false) - gqm.updateGroupDeltaUsedNoLock("2", createResourceList(5, 5)) - gqm.updateGroupDeltaUsedNoLock("3", createResourceList(7, 5)) + gqm.updateGroupDeltaUsedNoLock("2", createResourceList(5, 5), createResourceList(0, 0)) + gqm.updateGroupDeltaUsedNoLock("3", createResourceList(7, 5), createResourceList(0, 0)) expectedTotalUsed = quotav1.Add(expectedTotalUsed, createResourceList(5, 5)) expectedTotalUsed = quotav1.Add(expectedTotalUsed, createResourceList(7, 5)) diff --git a/pkg/scheduler/plugins/elasticquota/core/quota_info.go b/pkg/scheduler/plugins/elasticquota/core/quota_info.go index 8fd765f22..db616583c 100644 --- a/pkg/scheduler/plugins/elasticquota/core/quota_info.go +++ b/pkg/scheduler/plugins/elasticquota/core/quota_info.go @@ -41,8 +41,9 @@ type QuotaCalculateInfo struct { AutoScaleMin v1.ResourceList // All assigned pods used Used v1.ResourceList - // If the quota is allow to lent resource, it's equal to childRequest - // else it's the max of min and childRequest + // All non-preemptible pods used + NonPreemptibleUsed v1.ResourceList + // All pods request Request v1.ResourceList // ChildRquest is the sum of child quota requests. // If the quota is leaf, it's the sum of pods requests @@ -85,16 +86,17 @@ func NewQuotaInfo(isParent, allowLentResource bool, name, parentName string) *Qu RuntimeVersion: 0, PodCache: make(map[string]*PodInfo), CalculateInfo: QuotaCalculateInfo{ - Max: v1.ResourceList{}, - AutoScaleMin: v1.ResourceList{}, - Min: v1.ResourceList{}, - Used: v1.ResourceList{}, - Request: v1.ResourceList{}, - SharedWeight: v1.ResourceList{}, - Runtime: v1.ResourceList{}, - ChildRequest: v1.ResourceList{}, - Guaranteed: v1.ResourceList{}, - Allocated: v1.ResourceList{}, + Max: v1.ResourceList{}, + AutoScaleMin: v1.ResourceList{}, + Min: v1.ResourceList{}, + Used: v1.ResourceList{}, + NonPreemptibleUsed: v1.ResourceList{}, + Request: v1.ResourceList{}, + SharedWeight: v1.ResourceList{}, + Runtime: v1.ResourceList{}, + ChildRequest: v1.ResourceList{}, + Guaranteed: v1.ResourceList{}, + Allocated: v1.ResourceList{}, }, } } @@ -114,16 +116,17 @@ func (qi *QuotaInfo) DeepCopy() *QuotaInfo { RuntimeVersion: qi.RuntimeVersion, PodCache: make(map[string]*PodInfo), CalculateInfo: QuotaCalculateInfo{ - Max: qi.CalculateInfo.Max.DeepCopy(), - AutoScaleMin: qi.CalculateInfo.AutoScaleMin.DeepCopy(), - Min: qi.CalculateInfo.Min.DeepCopy(), - Used: qi.CalculateInfo.Used.DeepCopy(), - Request: qi.CalculateInfo.Request.DeepCopy(), - SharedWeight: qi.CalculateInfo.SharedWeight.DeepCopy(), - Runtime: qi.CalculateInfo.Runtime.DeepCopy(), - ChildRequest: qi.CalculateInfo.ChildRequest.DeepCopy(), - Guaranteed: qi.CalculateInfo.Guaranteed.DeepCopy(), - Allocated: qi.CalculateInfo.Allocated.DeepCopy(), + Max: qi.CalculateInfo.Max.DeepCopy(), + AutoScaleMin: qi.CalculateInfo.AutoScaleMin.DeepCopy(), + Min: qi.CalculateInfo.Min.DeepCopy(), + Used: qi.CalculateInfo.Used.DeepCopy(), + NonPreemptibleUsed: qi.CalculateInfo.NonPreemptibleUsed.DeepCopy(), + Request: qi.CalculateInfo.Request.DeepCopy(), + SharedWeight: qi.CalculateInfo.SharedWeight.DeepCopy(), + Runtime: qi.CalculateInfo.Runtime.DeepCopy(), + ChildRequest: qi.CalculateInfo.ChildRequest.DeepCopy(), + Guaranteed: qi.CalculateInfo.Guaranteed.DeepCopy(), + Allocated: qi.CalculateInfo.Allocated.DeepCopy(), }, } for name, pod := range qi.PodCache { @@ -146,6 +149,7 @@ func (qi *QuotaInfo) GetQuotaSummary() *QuotaInfoSummary { quotaInfoSummary.Min = qi.CalculateInfo.Min.DeepCopy() quotaInfoSummary.AutoScaleMin = qi.CalculateInfo.AutoScaleMin.DeepCopy() quotaInfoSummary.Used = qi.CalculateInfo.Used.DeepCopy() + quotaInfoSummary.NonPreemptibleUsed = qi.CalculateInfo.NonPreemptibleUsed.DeepCopy() quotaInfoSummary.Request = qi.CalculateInfo.Request.DeepCopy() quotaInfoSummary.SharedWeight = qi.CalculateInfo.SharedWeight.DeepCopy() quotaInfoSummary.Runtime = qi.CalculateInfo.Runtime.DeepCopy() @@ -231,11 +235,15 @@ func (qi *QuotaInfo) GetAllocated() v1.ResourceList { return qi.CalculateInfo.Allocated.DeepCopy() } -func (qi *QuotaInfo) addUsedNonNegativeNoLock(delta v1.ResourceList) { +func (qi *QuotaInfo) addUsedNonNegativeNoLock(delta, deltaNonPreemptibleUsed v1.ResourceList) { qi.CalculateInfo.Used = quotav1.Add(qi.CalculateInfo.Used, delta) + qi.CalculateInfo.NonPreemptibleUsed = quotav1.Add(qi.CalculateInfo.NonPreemptibleUsed, deltaNonPreemptibleUsed) for _, resName := range quotav1.IsNegative(qi.CalculateInfo.Used) { qi.CalculateInfo.Used[resName] = createQuantity(0, resName) } + for _, resName := range quotav1.IsNegative(qi.CalculateInfo.NonPreemptibleUsed) { + qi.CalculateInfo.NonPreemptibleUsed[resName] = createQuantity(0, resName) + } } func (qi *QuotaInfo) addAllocatedQuotaNoLock(delta v1.ResourceList) { @@ -279,6 +287,12 @@ func (qi *QuotaInfo) GetUsed() v1.ResourceList { return qi.CalculateInfo.Used.DeepCopy() } +func (qi *QuotaInfo) GetNonPreemptibleUsed() v1.ResourceList { + qi.lock.Lock() + defer qi.lock.Unlock() + return qi.CalculateInfo.NonPreemptibleUsed.DeepCopy() +} + func (qi *QuotaInfo) GetRuntime() v1.ResourceList { qi.lock.Lock() defer qi.lock.Unlock() @@ -322,6 +336,7 @@ func (qi *QuotaInfo) getMaskedRuntimeNoLock() v1.ResourceList { func (qi *QuotaInfo) clearForResetNoLock() { qi.CalculateInfo.Request = v1.ResourceList{} qi.CalculateInfo.Used = v1.ResourceList{} + qi.CalculateInfo.NonPreemptibleUsed = v1.ResourceList{} qi.CalculateInfo.Runtime = v1.ResourceList{} qi.CalculateInfo.ChildRequest = v1.ResourceList{} qi.CalculateInfo.Guaranteed = v1.ResourceList{} diff --git a/pkg/scheduler/plugins/elasticquota/core/quota_summary.go b/pkg/scheduler/plugins/elasticquota/core/quota_summary.go index fdd7e5989..e80a66cbd 100644 --- a/pkg/scheduler/plugins/elasticquota/core/quota_summary.go +++ b/pkg/scheduler/plugins/elasticquota/core/quota_summary.go @@ -32,14 +32,15 @@ type QuotaInfoSummary struct { RuntimeVersion int64 `json:"runtimeVersion"` AllowLentResource bool `json:"allowLentResource"` - Max v1.ResourceList `json:"max"` - Min v1.ResourceList `json:"min"` - AutoScaleMin v1.ResourceList `json:"autoScaleMin"` - Used v1.ResourceList `json:"used"` - Request v1.ResourceList `json:"request"` - SharedWeight v1.ResourceList `json:"sharedWeight"` - Runtime v1.ResourceList `json:"runtime"` - ChildRequest v1.ResourceList `json:"childRequest"` + Max v1.ResourceList `json:"max"` + Min v1.ResourceList `json:"min"` + AutoScaleMin v1.ResourceList `json:"autoScaleMin"` + Used v1.ResourceList `json:"used"` + NonPreemptibleUsed v1.ResourceList `json:"nonPreemptibleUsed"` + Request v1.ResourceList `json:"request"` + SharedWeight v1.ResourceList `json:"sharedWeight"` + Runtime v1.ResourceList `json:"runtime"` + ChildRequest v1.ResourceList `json:"childRequest"` PodCache map[string]*SimplePodInfo `json:"podCache"` } diff --git a/pkg/scheduler/plugins/elasticquota/core/runtime_quota_calculator_test.go b/pkg/scheduler/plugins/elasticquota/core/runtime_quota_calculator_test.go index 189eda650..7e8a6d305 100644 --- a/pkg/scheduler/plugins/elasticquota/core/runtime_quota_calculator_test.go +++ b/pkg/scheduler/plugins/elasticquota/core/runtime_quota_calculator_test.go @@ -58,7 +58,7 @@ func TestQuotaInfo_AddRequestNonNegativeNoLock(t *testing.T) { }, } quotaInfo.addRequestNonNegativeNoLock(req1) - quotaInfo.addUsedNonNegativeNoLock(req1) + quotaInfo.addUsedNonNegativeNoLock(req1, createResourceList(0, 0)) assert.Equal(t, quotaInfo.CalculateInfo.Request, createResourceList(0, 0)) assert.Equal(t, quotaInfo.CalculateInfo.Used, createResourceList(0, 0)) } diff --git a/pkg/scheduler/plugins/elasticquota/plugin.go b/pkg/scheduler/plugins/elasticquota/plugin.go index 7e0794aed..8bf0faa5c 100644 --- a/pkg/scheduler/plugins/elasticquota/plugin.go +++ b/pkg/scheduler/plugins/elasticquota/plugin.go @@ -56,17 +56,19 @@ const ( ) type PostFilterState struct { - skip bool - quotaInfo *core.QuotaInfo - used corev1.ResourceList - runtime corev1.ResourceList + skip bool + quotaInfo *core.QuotaInfo + used corev1.ResourceList + nonPreemptibleUsed corev1.ResourceList + runtime corev1.ResourceList } func (p *PostFilterState) Clone() framework.StateData { return &PostFilterState{ - quotaInfo: p.quotaInfo, - used: p.used.DeepCopy(), - runtime: p.runtime.DeepCopy(), + quotaInfo: p.quotaInfo, + used: p.used.DeepCopy(), + nonPreemptibleUsed: p.nonPreemptibleUsed.DeepCopy(), + runtime: p.runtime.DeepCopy(), } } @@ -233,6 +235,18 @@ func (g *Plugin) PreFilter(ctx context.Context, cycleState *framework.CycleState quotaName, printResourceList(state.runtime), printResourceList(state.used), printResourceList(podRequest), exceedDimensions)) } + if extension.IsPodNonPreemptible(pod) { + quotaMin := state.quotaInfo.CalculateInfo.Min + nonPreemptibleUsed := state.nonPreemptibleUsed + addNonPreemptibleUsed := quotav1.Add(podRequest, nonPreemptibleUsed) + + if isLessEqual, exceedDimensions := quotav1.LessThanOrEqual(addNonPreemptibleUsed, quotaMin); !isLessEqual { + return nil, framework.NewStatus(framework.Unschedulable, fmt.Sprintf("Insufficient non-preemptible quotas, "+ + "quotaName: %v, min: %v, nonPreemptibleUsed: %v, pod's request: %v, exceedDimensions: %v", + quotaName, printResourceList(quotaMin), printResourceList(nonPreemptibleUsed), printResourceList(podRequest), exceedDimensions)) + } + } + if *g.pluginArgs.EnableCheckParentQuota { return nil, g.checkQuotaRecursive(quotaName, []string{quotaName}, podRequest) } diff --git a/pkg/scheduler/plugins/elasticquota/plugin_helper.go b/pkg/scheduler/plugins/elasticquota/plugin_helper.go index d1ec478a8..baa690a36 100644 --- a/pkg/scheduler/plugins/elasticquota/plugin_helper.go +++ b/pkg/scheduler/plugins/elasticquota/plugin_helper.go @@ -241,9 +241,10 @@ func (g *Plugin) createRootQuotaIfNotPresent() { func (g *Plugin) snapshotPostFilterState(quotaInfo *core.QuotaInfo, state *framework.CycleState) *PostFilterState { postFilterState := &PostFilterState{ - quotaInfo: quotaInfo, - used: quotaInfo.GetUsed(), - runtime: quotaInfo.GetRuntime(), + quotaInfo: quotaInfo, + used: quotaInfo.GetUsed(), + nonPreemptibleUsed: quotaInfo.GetNonPreemptibleUsed(), + runtime: quotaInfo.GetRuntime(), } state.Write(postFilterKey, postFilterState) return postFilterState diff --git a/pkg/scheduler/plugins/elasticquota/plugin_test.go b/pkg/scheduler/plugins/elasticquota/plugin_test.go index ec274f0b0..add91d667 100644 --- a/pkg/scheduler/plugins/elasticquota/plugin_test.go +++ b/pkg/scheduler/plugins/elasticquota/plugin_test.go @@ -750,6 +750,114 @@ func TestPlugin_PreFilter_CheckParent(t *testing.T) { } } +func TestPlugin_Prefilter_QuotaNonPreempt(t *testing.T) { + test := []struct { + name string + pod *corev1.Pod + initPods []*corev1.Pod + quotaInfos []*v1alpha1.ElasticQuota + totalResource corev1.ResourceList + expectedStatus *framework.Status + }{ + { + name: "default", + pod: defaultCreatePodWithQuotaAndNonPreemptible("4", "test1", 1, 2, 2, true), + initPods: []*corev1.Pod{ + defaultCreatePodWithQuotaAndNonPreemptible("1", "test1", 10, 2, 1, false), + defaultCreatePodWithQuotaAndNonPreemptible("2", "test1", 9, 1, 1, false), + defaultCreatePodWithQuotaAndNonPreemptible("3", "test1", 8, 1, 1, false), + }, + quotaInfos: []*v1alpha1.ElasticQuota{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + }, + Spec: v1alpha1.ElasticQuotaSpec{ + Max: MakeResourceList().CPU(10).Mem(10).Obj(), + Min: MakeResourceList().CPU(5).Mem(5).Obj(), + }, + }, + }, + totalResource: createResourceList(10, 10), + expectedStatus: framework.NewStatus(framework.Success, ""), + }, + { + name: "non-preemptible pod used larger than min", + pod: defaultCreatePodWithQuotaAndNonPreemptible("4", "test1", 1, 2, 2, true), + initPods: []*corev1.Pod{ + defaultCreatePodWithQuotaAndNonPreemptible("1", "test1", 10, 2, 1, false), + defaultCreatePodWithQuotaAndNonPreemptible("2", "test1", 9, 2, 1, true), + defaultCreatePodWithQuotaAndNonPreemptible("3", "test1", 9, 2, 1, true), + }, + quotaInfos: []*v1alpha1.ElasticQuota{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + }, + Spec: v1alpha1.ElasticQuotaSpec{ + Max: MakeResourceList().CPU(10).Mem(8).Obj(), + Min: MakeResourceList().CPU(5).Mem(5).Obj(), + }, + }, + }, + totalResource: createResourceList(8, 5), + expectedStatus: framework.NewStatus(framework.Unschedulable, + fmt.Sprintf("Insufficient non-preemptible quotas, "+ + "quotaName: %v, min: %v, nonPreemptibleUsed: %v, pod's request: %v, exceedDimensions: [cpu]", + "test1", printResourceList(MakeResourceList().CPU(5).Mem(5).Obj()), + printResourceList(MakeResourceList().CPU(4).Mem(2).Obj()), printResourceList(MakeResourceList().CPU(2).Mem(2).Obj()))), + }, + { + name: "non-preemptible pod will not be evicted", + pod: defaultCreatePodWithQuotaAndNonPreemptible("4", "test1", 10, 2, 1, true), + initPods: []*corev1.Pod{ + defaultCreatePodWithQuotaAndNonPreemptible("1", "test1", 10, 2, 1, false), + defaultCreatePodWithQuotaAndNonPreemptible("2", "test1", 4, 2, 1, false), + defaultCreatePodWithQuotaAndNonPreemptible("3", "test1", 1, 2, 2, true), + }, + quotaInfos: []*v1alpha1.ElasticQuota{ + { + ObjectMeta: metav1.ObjectMeta{ + Name: "test1", + }, + Spec: v1alpha1.ElasticQuotaSpec{ + Max: MakeResourceList().CPU(10).Mem(8).Obj(), + Min: MakeResourceList().CPU(5).Mem(5).Obj(), + }, + }, + }, + totalResource: createResourceList(7, 5), + expectedStatus: framework.NewStatus(framework.Unschedulable, + fmt.Sprintf("Insufficient quotas, "+ + "quotaName: %v, runtime: %v, used: %v, pod's request: %v, exceedDimensions: [cpu]", + "test1", printResourceList(MakeResourceList().CPU(7).Mem(5).Obj()), + printResourceList(MakeResourceList().CPU(6).Mem(4).Obj()), printResourceList(MakeResourceList().CPU(2).Mem(1).Obj()))), + }, + } + for _, tt := range test { + t.Run(tt.name, func(t *testing.T) { + suit := newPluginTestSuit(t, nil) + p, _ := suit.proxyNew(suit.elasticQuotaArgs, suit.Handle) + gp := p.(*Plugin) + gp.groupQuotaManager.UpdateClusterTotalResource(tt.totalResource) + for _, qis := range tt.quotaInfos { + gp.OnQuotaAdd(qis) + } + + for _, pod := range tt.initPods { + gp.OnPodAdd(pod) + } + tt.pod.Spec.NodeName = "" + gp.OnPodAdd(tt.pod) + + state := framework.NewCycleState() + ctx := context.TODO() + _, status := gp.PreFilter(ctx, state, tt.pod) + assert.Equal(t, status, tt.expectedStatus) + }) + } +} + func TestPlugin_Reserve(t *testing.T) { test := []struct { name string @@ -1168,6 +1276,16 @@ func defaultCreatePodWithQuotaName(name, quotaName string, priority int32, cpu, return pod } +func defaultCreatePodWithQuotaAndNonPreemptible(name, quotaName string, priority int32, cpu, mem int64, nonPreempt bool) *corev1.Pod { + pod := defaultCreatePod(name, priority, cpu, mem) + pod.Labels[extension.LabelQuotaName] = quotaName + if nonPreempt { + pod.Labels[extension.LabelPreemptible] = "false" + } + pod.UID = types.UID(name) + return pod +} + func defaultCreatePod(name string, priority int32, cpu, mem int64) *corev1.Pod { pod := &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{ diff --git a/pkg/scheduler/plugins/elasticquota/preempt.go b/pkg/scheduler/plugins/elasticquota/preempt.go index 583a44aea..f065a37df 100644 --- a/pkg/scheduler/plugins/elasticquota/preempt.go +++ b/pkg/scheduler/plugins/elasticquota/preempt.go @@ -36,6 +36,7 @@ import ( "k8s.io/kubernetes/pkg/scheduler/framework/preemption" "k8s.io/kubernetes/pkg/scheduler/util" + "github.com/koordinator-sh/koordinator/apis/extension" "github.com/koordinator-sh/koordinator/pkg/scheduler/plugins/elasticquota/core" ) @@ -280,6 +281,9 @@ func getPDBLister(handle framework.Handle) policylisters.PodDisruptionBudgetList } func (g *Plugin) canPreempt(pod, victim *corev1.Pod) bool { + if extension.IsPodNonPreemptible(victim) { + return false + } podPri := corev1helpers.PodPriority(pod) vicPri := corev1helpers.PodPriority(victim) diff --git a/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke.go b/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke.go index e98ec8b8f..6716a9292 100644 --- a/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke.go +++ b/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke.go @@ -111,6 +111,9 @@ func (monitor *QuotaOverUsedGroupMonitor) getToRevokePodList(quotaName string) [ if shouldBreak, _ := quotav1.LessThanOrEqual(used, runtime); shouldBreak { break } + if extension.IsPodNonPreemptible(pod) { + continue + } podReq, _ := core.PodRequestsAndLimits(pod) used = quotav1.Subtract(used, podReq) tryAssignBackPodCache = append(tryAssignBackPodCache, pod) diff --git a/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke_test.go b/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke_test.go index 25512bce6..eac0c52c9 100644 --- a/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke_test.go +++ b/pkg/scheduler/plugins/elasticquota/quota_overuse_revoke_test.go @@ -107,10 +107,11 @@ func TestQuotaOverUsedRevokeController_GetToRevokePodList(t *testing.T) { con := NewQuotaOverUsedRevokeController(plugin) con.syncQuota() quotaInfo := gqm.GetQuotaInfoByName("test1") - pod1 := defaultCreatePod("1", 10, 30, 0) + pod1 := defaultCreatePod("1", 10, 20, 0) pod2 := defaultCreatePod("2", 9, 10, 1) pod3 := defaultCreatePod("3", 8, 20, 0) pod4 := defaultCreatePod("4", 7, 40, 0) + defaultCreatePodWithQuotaAndNonPreemptible("5", "test1", 1, 10, 0, true) gqm.OnPodAdd("test1", pod1) gqm.OnPodAdd("test1", pod2) gqm.OnPodAdd("test1", pod3) @@ -120,7 +121,7 @@ func TestQuotaOverUsedRevokeController_GetToRevokePodList(t *testing.T) { if len(result) != 2 { t.Errorf("error:%v", len(result)) } - if result[0].Name != "2" || result[1].Name != "4" { + if result[0].Name == "5" || result[0].Name != "2" || result[1].Name != "4" { t.Errorf("error") } qi.Lock() diff --git a/pkg/webhook/elasticquota/pod_check.go b/pkg/webhook/elasticquota/pod_check.go index d91336c79..7c6362436 100644 --- a/pkg/webhook/elasticquota/pod_check.go +++ b/pkg/webhook/elasticquota/pod_check.go @@ -54,6 +54,9 @@ func (qt *quotaTopology) ValidateAddPod(pod *corev1.Pod) error { } func (qt *quotaTopology) ValidateUpdatePod(oldPod, newPod *corev1.Pod) error { + if oldPod.Labels[extension.LabelPreemptible] != newPod.Labels[extension.LabelPreemptible] { + return fmt.Errorf("Preemptible label is forbidden modify now.") + } return qt.ValidateAddPod(newPod) }