-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Resource quota enforcement webhook (#544)
* Cert configuration and reloading * Add support for strict webhook error handling * Improve webhook error handling * Don't deregister the webhook when failure policy is strict * standard error message capitalization * have the webhook parse its own configuration from flags * clean up cert provider code * Add explanation for skipping deregistration * Resource Quota enforcement webhook * Fix bad merge * Cleanup, fixes * Cleanup * Document the quota enforcer
- Loading branch information
1 parent
dc27045
commit edcf4cd
Showing
10 changed files
with
919 additions
and
79 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
package resourceusage | ||
|
||
import ( | ||
"fmt" | ||
so "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1" | ||
crdinformers "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/client/informers/externalversions" | ||
"github.com/golang/glog" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/apimachinery/pkg/labels" | ||
"k8s.io/client-go/informers" | ||
corev1informers "k8s.io/client-go/informers/core/v1" | ||
"k8s.io/client-go/tools/cache" | ||
) | ||
|
||
type ResourceQuotaEnforcer struct { | ||
watcher ResourceUsageWatcher | ||
resourceQuotaInformer corev1informers.ResourceQuotaInformer | ||
} | ||
|
||
func NewResourceQuotaEnforcer(crdInformerFactory crdinformers.SharedInformerFactory, coreV1InformerFactory informers.SharedInformerFactory) ResourceQuotaEnforcer { | ||
resourceUsageWatcher := newResourceUsageWatcher(crdInformerFactory, coreV1InformerFactory) | ||
informer := coreV1InformerFactory.Core().V1().ResourceQuotas() | ||
informer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{}) | ||
return ResourceQuotaEnforcer{ | ||
watcher: resourceUsageWatcher, | ||
resourceQuotaInformer: informer, | ||
} | ||
} | ||
|
||
// TODO: There appears to be a deadlock in cache.WaitForCacheSync. Possibly related? https://github.com/kubernetes/kubernetes/issues/71450 | ||
// For now, return immediately. There will be a short window after startup where quota calcuation is incorrect. | ||
func (r ResourceQuotaEnforcer) WaitForCacheSync(stopCh <-chan struct{}) error { | ||
/*if !cache.WaitForCacheSync(stopCh, func() bool { | ||
return r.resourceQuotaInformer.Informer().HasSynced() | ||
}) { | ||
return fmt.Errorf("cache sync canceled") | ||
}*/ | ||
return nil | ||
} | ||
|
||
func (r *ResourceQuotaEnforcer) admitResource(kind, namespace, name string, requestedResources ResourceList) (string, error) { | ||
glog.V(2).Infof("Processing admission request for %s %s/%s, requesting: %s", kind, namespace, name, requestedResources) | ||
resourceQuotas, err := r.resourceQuotaInformer.Lister().ResourceQuotas(namespace).List(labels.Everything()) | ||
if err != nil { | ||
return "", err | ||
} | ||
if (requestedResources.cpu.IsZero() && requestedResources.memory.IsZero()) || len(resourceQuotas) == 0 { | ||
return "", nil | ||
} | ||
|
||
currentNamespaceUsage, currentApplicationUsage := r.watcher.GetCurrentResourceUsageWithApplication(namespace, kind, name) | ||
|
||
for _, quota := range resourceQuotas { | ||
// Scope selectors not currently supported, ignore any ResourceQuota that does not match everything. | ||
if quota.Spec.ScopeSelector != nil || len(quota.Spec.Scopes) > 0 { | ||
continue | ||
} | ||
|
||
// If an existing application has increased its usage, check it against the quota again. If its usage hasn't increased, always allow it. | ||
if requestedResources.cpu.Cmp(currentApplicationUsage.cpu) == 1 { | ||
if cpuLimit, present := quota.Spec.Hard[corev1.ResourceCPU]; present { | ||
availableCpu := cpuLimit | ||
availableCpu.Sub(currentNamespaceUsage.cpu) | ||
if requestedResources.cpu.Cmp(availableCpu) == 1 { | ||
return fmt.Sprintf("%s %s/%s requests too many cores (%.3f cores requested, %.3f available).", kind, namespace, name, float64(requestedResources.cpu.MilliValue())/1000.0, float64(availableCpu.MilliValue())/1000.0), nil | ||
} | ||
} | ||
} | ||
|
||
if requestedResources.memory.Cmp(currentApplicationUsage.memory) == 1 { | ||
if memoryLimit, present := quota.Spec.Hard[corev1.ResourceMemory]; present { | ||
availableMemory := memoryLimit | ||
availableMemory.Sub(currentNamespaceUsage.memory) | ||
if requestedResources.memory.Cmp(availableMemory) == 1 { | ||
return fmt.Sprintf("%s %s/%s requests too much memory (%dMi requested, %dMi available).", kind, namespace, name, requestedResources.memory.Value()/(1<<20), availableMemory.Value()/(1<<20)), nil | ||
} | ||
} | ||
} | ||
} | ||
return "", nil | ||
} | ||
|
||
func (r *ResourceQuotaEnforcer) AdmitSparkApplication(app so.SparkApplication) (string, error) { | ||
resourceUsage, err := sparkApplicationResourceUsage(app) | ||
if err != nil { | ||
return "", err | ||
} | ||
return r.admitResource(KindSparkApplication, app.ObjectMeta.Namespace, app.ObjectMeta.Name, resourceUsage) | ||
} | ||
|
||
func (r *ResourceQuotaEnforcer) AdmitScheduledSparkApplication(app so.ScheduledSparkApplication) (string, error) { | ||
resourceUsage, err := scheduledSparkApplicationResourceUsage(app) | ||
if err != nil { | ||
return "", err | ||
} | ||
return r.admitResource(KindScheduledSparkApplication, app.ObjectMeta.Namespace, app.ObjectMeta.Name, resourceUsage) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
package resourceusage | ||
|
||
import ( | ||
so "github.com/GoogleCloudPlatform/spark-on-k8s-operator/pkg/apis/sparkoperator.k8s.io/v1beta1" | ||
|
||
"github.com/golang/glog" | ||
corev1 "k8s.io/api/core/v1" | ||
"k8s.io/client-go/tools/cache" | ||
) | ||
|
||
func (r *ResourceUsageWatcher) onPodAdded(obj interface{}) { | ||
pod := obj.(*corev1.Pod) | ||
// A pod launched by the Spark operator will already be accounted for by the CRD informer callback | ||
if !launchedBySparkOperator(pod.ObjectMeta) { | ||
r.setResources("Pod", namespaceOrDefault(pod.ObjectMeta), pod.ObjectMeta.Name, podResourceUsage(pod), r.usageByNamespacePod) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onPodUpdated(oldObj, newObj interface{}) { | ||
newPod := newObj.(*corev1.Pod) | ||
if !launchedBySparkOperator(newPod.ObjectMeta) { | ||
if newPod.Status.Phase == corev1.PodFailed || newPod.Status.Phase == corev1.PodSucceeded { | ||
r.deleteResources("Pod", namespaceOrDefault(newPod.ObjectMeta), newPod.ObjectMeta.Name, r.usageByNamespacePod) | ||
} else { | ||
r.setResources("Pod", namespaceOrDefault(newPod.ObjectMeta), newPod.ObjectMeta.Name, podResourceUsage(newPod), r.usageByNamespacePod) | ||
} | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onPodDeleted(obj interface{}) { | ||
var pod *corev1.Pod | ||
switch o := obj.(type) { | ||
case *corev1.Pod: | ||
pod = o | ||
case cache.DeletedFinalStateUnknown: | ||
pod = o.Obj.(*corev1.Pod) | ||
default: | ||
return | ||
} | ||
if !launchedBySparkOperator(pod.ObjectMeta) { | ||
r.deleteResources("Pod", namespaceOrDefault(pod.ObjectMeta), pod.ObjectMeta.Name, r.usageByNamespacePod) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onSparkApplicationAdded(obj interface{}) { | ||
app := obj.(*so.SparkApplication) | ||
namespace := namespaceOrDefault(app.ObjectMeta) | ||
resources, err := sparkApplicationResourceUsage(*app) | ||
if err != nil { | ||
glog.Errorf("failed to determine resource usage of SparkApplication %s/%s: %v", namespace, app.ObjectMeta.Name, err) | ||
} else { | ||
r.setResources(KindSparkApplication, namespace, app.ObjectMeta.Name, resources, r.usageByNamespaceApplication) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onSparkApplicationUpdated(oldObj, newObj interface{}) { | ||
oldApp := oldObj.(*so.SparkApplication) | ||
newApp := newObj.(*so.SparkApplication) | ||
if oldApp.ResourceVersion == newApp.ResourceVersion { | ||
return | ||
} | ||
namespace := namespaceOrDefault(newApp.ObjectMeta) | ||
newResources, err := sparkApplicationResourceUsage(*newApp) | ||
if err != nil { | ||
glog.Errorf("failed to determine resource useage of SparkApplication %s/%s: %v", namespace, newApp.ObjectMeta.Name, err) | ||
} else { | ||
r.setResources(KindSparkApplication, namespace, newApp.ObjectMeta.Name, newResources, r.usageByNamespaceApplication) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onSparkApplicationDeleted(obj interface{}) { | ||
var app *so.SparkApplication | ||
switch o := obj.(type) { | ||
case *so.SparkApplication: | ||
app = o | ||
case cache.DeletedFinalStateUnknown: | ||
app = o.Obj.(*so.SparkApplication) | ||
default: | ||
return | ||
} | ||
namespace := namespaceOrDefault(app.ObjectMeta) | ||
r.deleteResources(KindSparkApplication, namespace, app.ObjectMeta.Name, r.usageByNamespaceApplication) | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationAdded(obj interface{}) { | ||
app := obj.(*so.ScheduledSparkApplication) | ||
namespace := namespaceOrDefault(app.ObjectMeta) | ||
resources, err := scheduledSparkApplicationResourceUsage(*app) | ||
if err != nil { | ||
glog.Errorf("failed to determine resource usage of ScheduledSparkApplication %s/%s: %v", namespace, app.ObjectMeta.Name, err) | ||
} else { | ||
r.setResources(KindScheduledSparkApplication, namespace, app.ObjectMeta.Name, resources, r.usageByNamespaceScheduledApplication) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationUpdated(oldObj, newObj interface{}) { | ||
newApp := oldObj.(*so.ScheduledSparkApplication) | ||
namespace := namespaceOrDefault(newApp.ObjectMeta) | ||
newResources, err := scheduledSparkApplicationResourceUsage(*newApp) | ||
if err != nil { | ||
glog.Errorf("failed to determine resource usage of ScheduledSparkApplication %s/%s: %v", namespace, newApp.ObjectMeta.Name, err) | ||
} else { | ||
r.setResources(KindSparkApplication, namespace, newApp.ObjectMeta.Name, newResources, r.usageByNamespaceScheduledApplication) | ||
} | ||
} | ||
|
||
func (r *ResourceUsageWatcher) onScheduledSparkApplicationDeleted(obj interface{}) { | ||
var app *so.ScheduledSparkApplication | ||
switch o := obj.(type) { | ||
case *so.ScheduledSparkApplication: | ||
app = o | ||
case cache.DeletedFinalStateUnknown: | ||
app = o.Obj.(*so.ScheduledSparkApplication) | ||
default: | ||
return | ||
} | ||
namespace := namespaceOrDefault(app.ObjectMeta) | ||
r.deleteResources(KindScheduledSparkApplication, namespace, app.ObjectMeta.Name, r.usageByNamespaceScheduledApplication) | ||
} |
Oops, something went wrong.