Skip to content

Commit

Permalink
Initial rework of manifests
Browse files Browse the repository at this point in the history
Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com>
  • Loading branch information
israel-hdez committed Dec 13, 2024
1 parent d6b0ee3 commit c82277f
Show file tree
Hide file tree
Showing 45 changed files with 1,341 additions and 97 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ help: ## Display this help.

.PHONY: manifests
manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects.
# Any customization needed, apply to the webhook_patch.yaml file
# Any customization needed, apply to a patch in the kustomize.yaml file on webhooks
$(CONTROLLER_GEN) rbac:roleName=odh-model-controller-role,headerFile="hack/manifests_boilerplate.yaml.txt" crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases

# TODO: Evaluate if this is still needed
Expand Down
2 changes: 1 addition & 1 deletion cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ func main() {
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
flag.BoolVar(&secureMetrics, "metrics-secure", false,
flag.BoolVar(&secureMetrics, "metrics-secure", false, // TODO: restore to true by default.
"If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.")
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
Expand Down
140 changes: 140 additions & 0 deletions config/base/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../default

namespace: opendatahub
configMapGenerator:
- envs:
- params.env
- params-vllm-rocm.env
- params-vllm-gaudi.env
name: odh-model-controller-parameters
generatorOptions:
disableNameSuffixHash: true

replacements:
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.tgis-image
targets:
- select:
kind: Template
name: caikit-tgis-serving-template
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.caikit-tgis-image
targets:
- select:
kind: Template
name: caikit-tgis-serving-template
fieldPaths:
- objects.0.spec.containers.1.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.caikit-standalone-image
targets:
- select:
kind: Template
name: caikit-standalone-serving-template
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.tgis-image
targets:
- select:
kind: Template
name: tgis-grpc-serving-template
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.ovms-image
targets:
- select:
kind: Template
name: kserve-ovms
fieldPaths:
- objects.0.spec.containers.0.image
- select:
kind: Template
name: ovms
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.vllm-image
targets:
- select:
kind: Template
name: vllm-runtime-template
fieldPaths:
- objects.0.spec.containers.0.image
- select:
kind: Template
name: vllm-multinode-runtime-template
fieldPaths:
- objects.0.spec.containers.0.image
- objects.0.spec.workerSpec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.vllm-rocm-image
targets:
- select:
kind: Template
name: vllm-rocm-runtime-template
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.vllm-gaudi-image
targets:
- select:
kind: Template
name: vllm-gaudi-runtime-template
fieldPaths:
- objects.0.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: data.odh-model-controller
targets:
- select:
kind: Deployment
name: odh-model-controller
fieldPaths:
- spec.template.spec.containers.0.image
- source:
kind: ConfigMap
version: v1
name: odh-model-controller-parameters
fieldPath: metadata.namespace
targets:
- select:
kind: ValidatingWebhookConfiguration
name: validating-webhook-configuration
fieldPaths:
- webhooks.0.clientConfig.service.namespace

patches:
- path: remove-namespace.yaml
12 changes: 12 additions & 0 deletions config/base/odh_model_controller_manager_patch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: odh-model-controller
spec:
template:
spec:
containers:
- args:
- --leader-elect
image: $(odh-model-controller)
name: manager
1 change: 1 addition & 0 deletions config/base/params-vllm-gaudi.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
vllm-gaudi-image=quay.io/opendatahub/vllm:fast-gaudi
1 change: 1 addition & 0 deletions config/base/params-vllm-rocm.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
vllm-rocm-image=quay.io/opendatahub/vllm:fast-rocm
7 changes: 7 additions & 0 deletions config/base/params.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
odh-model-controller=quay.io/opendatahub/odh-model-controller:fast
caikit-tgis-image=quay.io/opendatahub/caikit-tgis-serving:fast
caikit-standalone-image=quay.io/opendatahub/caikit-nlp:fast
tgis-image=quay.io/opendatahub/text-generation-inference:fast
ovms-image=quay.io/opendatahub/openvino_model_server:2024.3-release
vllm-image=quay.io/opendatahub/vllm:fast
nim-state=removed
6 changes: 6 additions & 0 deletions config/base/remove-namespace.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Remove namespace resource as namespace will already exist.
$patch: delete
apiVersion: v1
kind: Namespace
metadata:
name: system
4 changes: 2 additions & 2 deletions config/crd/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@ patches:

# [WEBHOOK] To enable webhook, uncomment the following section
# the following config is for teaching kustomize how to do kustomization for CRDs.
#configurations:
#- kustomizeconfig.yaml
configurations:
- kustomizeconfig.yaml
7 changes: 4 additions & 3 deletions config/default/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace: odh-model-controller-system
# "wordpress" becomes "alices-wordpress".
# Note that it should also match with the prefix (text before '-') of the namespace
# field above.
namePrefix: odh-model-controller-
# namePrefix: odh-model-controller-

# Labels to add to all resources and selectors.
#labels:
Expand All @@ -24,14 +24,15 @@ resources:
# [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required.
#- ../certmanager
# [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'.
#- ../prometheus
- ../prometheus
# [METRICS] Expose the controller manager metrics service.
- metrics_service.yaml
# [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy.
# Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics.
# Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will
# be able to communicate with the Webhook Server.
#- ../network-policy
- ../network-policy
- ../runtimes

# Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager
patches:
Expand Down
3 changes: 2 additions & 1 deletion config/default/manager_metrics_patch.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# This patch adds the args to allow exposing the metrics endpoint using HTTPS
# TODO: restore to 8443 port
- op: add
path: /spec/template/spec/containers/0/args/0
value: --metrics-bind-address=:8443
value: --metrics-bind-address=:8080
10 changes: 5 additions & 5 deletions config/default/manager_webhook_patch.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
name: odh-model-controller
namespace: system
labels:
app.kubernetes.io/name: odh-model-controller
app.kubernetes.io/managed-by: kustomize
# labels:
# app.kubernetes.io/name: odh-model-controller
# app.kubernetes.io/managed-by: kustomize
spec:
template:
spec:
Expand All @@ -23,4 +23,4 @@ spec:
- name: cert
secret:
defaultMode: 420
secretName: webhook-server-cert
secretName: odh-model-controller-webhook-cert
16 changes: 8 additions & 8 deletions config/default/metrics_service.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,16 @@ apiVersion: v1
kind: Service
metadata:
labels:
control-plane: controller-manager
app.kubernetes.io/name: odh-model-controller
app.kubernetes.io/managed-by: kustomize
name: controller-manager-metrics-service
control-plane: odh-model-controller
# app.kubernetes.io/name: odh-model-controller
# app.kubernetes.io/managed-by: kustomize
name: odh-model-controller-metrics-service
namespace: system
spec:
ports:
- name: https
port: 8443
- name: http # TODO: Restore to http
port: 8080 # TODO: Use TLS and change to 8443
protocol: TCP
targetPort: 8443
targetPort: 8080 # TODO: Use TLS and change to 8443
selector:
control-plane: controller-manager
control-plane: odh-model-controller
54 changes: 44 additions & 10 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ metadata:
apiVersion: apps/v1
kind: Deployment
metadata:
name: controller-manager
name: odh-model-controller
namespace: system
labels:
control-plane: controller-manager
app.kubernetes.io/name: odh-model-controller
app.kubernetes.io/managed-by: kustomize
control-plane: odh-model-controller
app: odh-model-controller
spec:
selector:
matchLabels:
control-plane: controller-manager
control-plane: odh-model-controller
replicas: 1
template:
metadata:
annotations:
kubectl.kubernetes.io/default-container: manager
labels:
control-plane: controller-manager
control-plane: odh-model-controller
app: odh-model-controller
spec:
# TODO(user): Uncomment the following code to configure the nodeAffinity expression
# according to the platforms which are supported by your solution.
Expand Down Expand Up @@ -65,11 +65,47 @@ spec:
- --health-probe-bind-address=:8081
image: controller:latest
name: manager
imagePullPolicy: Always
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- "ALL"
env:
- name: POD_NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: AUTH_AUDIENCE
valueFrom:
configMapKeyRef:
name: auth-refs
key: AUTH_AUDIENCE
optional: true
- name: AUTHORINO_LABEL
valueFrom:
configMapKeyRef:
name: auth-refs
key: AUTHORINO_LABEL
optional: true
- name: CONTROL_PLANE_NAME
valueFrom:
configMapKeyRef:
name: service-mesh-refs
key: CONTROL_PLANE_NAME
optional: true
- name: MESH_NAMESPACE
valueFrom:
configMapKeyRef:
name: service-mesh-refs
key: MESH_NAMESPACE
optional: true
- name: NIM_STATE
valueFrom:
configMapKeyRef:
name: odh-model-controller-parameters
key: nim-state
optional: true
livenessProbe:
httpGet:
path: /healthz
Expand All @@ -82,14 +118,12 @@ spec:
port: 8081
initialDelaySeconds: 5
periodSeconds: 10
# TODO(user): Configure the resources accordingly based on the project requirements.
# More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
resources:
limits:
cpu: 500m
memory: 128Mi
memory: 2Gi
requests:
cpu: 10m
memory: 64Mi
serviceAccountName: controller-manager
serviceAccountName: odh-model-controller
terminationGracePeriodSeconds: 10
4 changes: 2 additions & 2 deletions config/network-policy/allow-webhook-traffic.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@ metadata:
labels:
app.kubernetes.io/name: odh-model-controller
app.kubernetes.io/managed-by: kustomize
name: allow-webhook-traffic
name: odh-model-controller # Original scaffolded name is allow-webhook-traffic
namespace: system
spec:
podSelector:
matchLabels:
control-plane: controller-manager
control-plane: odh-model-controller
policyTypes:
- Ingress
ingress:
Expand Down
2 changes: 1 addition & 1 deletion config/network-policy/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
resources:
- allow-webhook-traffic.yaml
- allow-metrics-traffic.yaml
#- allow-metrics-traffic.yaml
Loading

0 comments on commit c82277f

Please sign in to comment.