From 00ae75c614be8531149f3fa438f5707d7e0a5aa5 Mon Sep 17 00:00:00 2001 From: Giulio Frasca Date: Mon, 24 Jun 2024 17:01:56 -0400 Subject: [PATCH 1/2] Add ResourceRequirments to WorkflowController - Introduces ResourceRequirements for WorkflowController in DSPA CRD - Adds dynamic request and limit defaults for WC deployment --- api/v1alpha1/dspipeline_types.go | 2 ++ api/v1alpha1/zz_generated.deepcopy.go | 7 +++- ...b.io_datasciencepipelinesapplications.yaml | 35 +++++++++++++++++++ .../workflow-controller/deployment.yaml.tmpl | 21 +++++++++-- controllers/config/defaults.go | 19 +++++----- controllers/dspipeline_params.go | 1 + 6 files changed, 72 insertions(+), 13 deletions(-) diff --git a/api/v1alpha1/dspipeline_types.go b/api/v1alpha1/dspipeline_types.go index 75652665d..29cff1a58 100644 --- a/api/v1alpha1/dspipeline_types.go +++ b/api/v1alpha1/dspipeline_types.go @@ -331,6 +331,8 @@ type WorkflowController struct { Image string `json:"image,omitempty"` ArgoExecImage string `json:"argoExecImage,omitempty"` CustomConfig string `json:"customConfig,omitempty"` + // Specify custom Pod resource requirements for this component. + Resources *ResourceRequirements `json:"resources,omitempty"` } // ResourceRequirements structures compute resource requirements. diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index d918f712d..8e531fee6 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -117,7 +117,7 @@ func (in *DSPASpec) DeepCopyInto(out *DSPASpec) { if in.WorkflowController != nil { in, out := &in.WorkflowController, &out.WorkflowController *out = new(WorkflowController) - **out = **in + (*in).DeepCopyInto(*out) } } @@ -584,6 +584,11 @@ func (in *SecretKeyValue) DeepCopy() *SecretKeyValue { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *WorkflowController) DeepCopyInto(out *WorkflowController) { *out = *in + if in.Resources != nil { + in, out := &in.Resources, &out.Resources + *out = new(ResourceRequirements) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new WorkflowController. diff --git a/config/crd/bases/datasciencepipelinesapplications.opendatahub.io_datasciencepipelinesapplications.yaml b/config/crd/bases/datasciencepipelinesapplications.opendatahub.io_datasciencepipelinesapplications.yaml index 749ff2edf..d3c139b89 100644 --- a/config/crd/bases/datasciencepipelinesapplications.opendatahub.io_datasciencepipelinesapplications.yaml +++ b/config/crd/bases/datasciencepipelinesapplications.opendatahub.io_datasciencepipelinesapplications.yaml @@ -818,6 +818,41 @@ spec: type: boolean image: type: string + resources: + description: Specify custom Pod resource requirements for this + component. + properties: + limits: + properties: + cpu: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + memory: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + requests: + properties: + cpu: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + memory: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + type: object + type: object type: object required: - objectStorage diff --git a/config/internal/workflow-controller/deployment.yaml.tmpl b/config/internal/workflow-controller/deployment.yaml.tmpl index c4a502a86..1df708492 100644 --- a/config/internal/workflow-controller/deployment.yaml.tmpl +++ b/config/internal/workflow-controller/deployment.yaml.tmpl @@ -57,9 +57,24 @@ spec: name: metrics - containerPort: 6060 resources: - requests: - cpu: 100m - memory: 500Mi + {{ if .WorkflowController.Resources.Requests }} + requests: + {{ if .WorkflowController.Resources.Requests.CPU }} + cpu: {{.WorkflowController.Resources.Requests.CPU}} + {{ end }} + {{ if .WorkflowController.Resources.Requests.Memory }} + memory: {{.WorkflowController.Resources.Requests.Memory}} + {{ end }} + {{ end }} + {{ if .WorkflowController.Resources.Limits }} + limits: + {{ if .WorkflowController.Resources.Limits.CPU }} + cpu: {{.WorkflowController.Resources.Limits.CPU}} + {{ end }} + {{ if .WorkflowController.Resources.Limits.Memory }} + memory: {{.WorkflowController.Resources.Limits.Memory}} + {{ end }} + {{ end }} securityContext: allowPrivilegeEscalation: false capabilities: diff --git a/controllers/config/defaults.go b/controllers/config/defaults.go index 5af701435..a7dbe37d5 100644 --- a/controllers/config/defaults.go +++ b/controllers/config/defaults.go @@ -181,15 +181,16 @@ func GetConfigRequiredFields() []string { // Default ResourceRequirements var ( - APIServerResourceRequirements = createResourceRequirement(resource.MustParse("250m"), resource.MustParse("500Mi"), resource.MustParse("500m"), resource.MustParse("1Gi")) - PersistenceAgentResourceRequirements = createResourceRequirement(resource.MustParse("120m"), resource.MustParse("500Mi"), resource.MustParse("250m"), resource.MustParse("1Gi")) - ScheduledWorkflowResourceRequirements = createResourceRequirement(resource.MustParse("120m"), resource.MustParse("100Mi"), resource.MustParse("250m"), resource.MustParse("250Mi")) - MariaDBResourceRequirements = createResourceRequirement(resource.MustParse("300m"), resource.MustParse("800Mi"), resource.MustParse("1"), resource.MustParse("1Gi")) - MinioResourceRequirements = createResourceRequirement(resource.MustParse("200m"), resource.MustParse("100Mi"), resource.MustParse("250m"), resource.MustParse("1Gi")) - MlPipelineUIResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) - MlmdEnvoyResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) - MlmdGRPCResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) - MlmdWriterResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) + APIServerResourceRequirements = createResourceRequirement(resource.MustParse("250m"), resource.MustParse("500Mi"), resource.MustParse("500m"), resource.MustParse("1Gi")) + PersistenceAgentResourceRequirements = createResourceRequirement(resource.MustParse("120m"), resource.MustParse("500Mi"), resource.MustParse("250m"), resource.MustParse("1Gi")) + ScheduledWorkflowResourceRequirements = createResourceRequirement(resource.MustParse("120m"), resource.MustParse("100Mi"), resource.MustParse("250m"), resource.MustParse("250Mi")) + WorkflowControllerResourceRequirements = createResourceRequirement(resource.MustParse("120m"), resource.MustParse("500Mi"), resource.MustParse("250m"), resource.MustParse("1Gi")) + MariaDBResourceRequirements = createResourceRequirement(resource.MustParse("300m"), resource.MustParse("800Mi"), resource.MustParse("1"), resource.MustParse("1Gi")) + MinioResourceRequirements = createResourceRequirement(resource.MustParse("200m"), resource.MustParse("100Mi"), resource.MustParse("250m"), resource.MustParse("1Gi")) + MlPipelineUIResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) + MlmdEnvoyResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) + MlmdGRPCResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) + MlmdWriterResourceRequirements = createResourceRequirement(resource.MustParse("100m"), resource.MustParse("256Mi"), resource.MustParse("100m"), resource.MustParse("256Mi")) ) type DBExtraParams map[string]string diff --git a/controllers/dspipeline_params.go b/controllers/dspipeline_params.go index e95c863a2..9c0434cd3 100644 --- a/controllers/dspipeline_params.go +++ b/controllers/dspipeline_params.go @@ -804,6 +804,7 @@ func (p *DSPAParams) ExtractParams(ctx context.Context, dsp *dspa.DataSciencePip argoExecImageFromConfig := config.GetStringConfigWithDefault(config.ArgoExecImagePath, config.DefaultImageValue) setStringDefault(argoWorkflowImageFromConfig, &p.WorkflowController.Image) setStringDefault(argoExecImageFromConfig, &p.WorkflowController.ArgoExecImage) + setResourcesDefault(config.WorkflowControllerResourceRequirements, &p.WorkflowController.Resources) } err := p.SetupMLMD(dsp, log) From fa280ae06235202db3e69c4249690498fbea11cf Mon Sep 17 00:00:00 2001 From: Giulio Frasca Date: Tue, 25 Jun 2024 14:47:55 -0400 Subject: [PATCH 2/2] Add WorkflowController item to dspa_all_fields sample --- .../samples/v2/dspa-all-fields/dspa_all_fields.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/config/samples/v2/dspa-all-fields/dspa_all_fields.yaml b/config/samples/v2/dspa-all-fields/dspa_all_fields.yaml index 31815994b..1af41df21 100644 --- a/config/samples/v2/dspa-all-fields/dspa_all_fields.yaml +++ b/config/samples/v2/dspa-all-fields/dspa_all_fields.yaml @@ -101,6 +101,18 @@ spec: requests: cpu: 100m memory: 256Mi + workflowController: + deploy: true + image: quay.io/opendatahub/ds-pipelines-argo-workflowcontroller:3.3.10-upstream + argoExecImage: quay.io/opendatahub/ds-pipelines-argo-argoexec:3.3.10-upstream + customConfig: some-custom-workflowcontroller-configmap # see ../custom-workflow-controller-config for example + resources: + requests: + cpu: 120m + memory: 500Mi + limits: + cpu: 250m + memory: 1Gi database: disableHealthCheck: false # possible values for tls: true, false, skip-verify