From c82277f2ffda06e3e2f79d78b1bbc2d817e9e499 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Hern=C3=A1ndez?= <23639005+israel-hdez@users.noreply.github.com> Date: Thu, 12 Dec 2024 12:41:28 -0600 Subject: [PATCH] Initial rework of manifests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Edgar Hernández <23639005+israel-hdez@users.noreply.github.com> --- Makefile | 2 +- cmd/main.go | 2 +- config/base/kustomization.yaml | 140 +++++++++ .../odh_model_controller_manager_patch.yaml | 12 + config/base/params-vllm-gaudi.env | 1 + config/base/params-vllm-rocm.env | 1 + config/base/params.env | 7 + config/base/remove-namespace.yaml | 6 + config/crd/kustomization.yaml | 4 +- config/default/kustomization.yaml | 7 +- config/default/manager_metrics_patch.yaml | 3 +- config/default/manager_webhook_patch.yaml | 10 +- config/default/metrics_service.yaml | 16 +- config/manager/manager.yaml | 54 +++- .../network-policy/allow-webhook-traffic.yaml | 4 +- config/network-policy/kustomization.yaml | 2 +- config/prometheus/monitor.yaml | 14 +- config/rbac/auth_proxy_role.yaml | 17 ++ config/rbac/auth_proxy_role_binding.yaml | 11 + .../rbac/kserve_prometheus_clusterrole.yaml | 15 + config/rbac/kustomization.yaml | 13 +- config/rbac/leader_election_role.yaml | 6 +- config/rbac/leader_election_role_binding.yaml | 8 +- config/rbac/role.yaml | 205 ++++++++++++- config/rbac/role_binding.yaml | 12 +- config/rbac/service_account.yaml | 8 +- .../runtimes/caikit-standalone-template.yaml | 76 +++++ config/runtimes/caikit-tgis-template.yaml | 58 ++++ config/runtimes/kustomization.yaml | 17 ++ config/runtimes/ovms-kserve-template.yaml | 64 ++++ config/runtimes/ovms-mm-template.yaml | 65 ++++ config/runtimes/tgis-template.yaml | 49 +++ config/runtimes/vllm-gaudi-template.yaml | 53 ++++ config/runtimes/vllm-multinode-template.yaml | 279 ++++++++++++++++++ config/runtimes/vllm-rocm-template.yaml | 51 ++++ config/runtimes/vllm-template.yaml | 51 ++++ config/webhook/kustomization.yaml | 40 +++ config/webhook/manifests.yaml | 2 +- config/webhook/service.yaml | 12 +- .../controller/core/configmap_controller.go | 2 - internal/controller/core/secret_controller.go | 2 - internal/controller/nim/account_controller.go | 5 +- .../serving/inferenceservice_controller.go | 27 +- .../serving/servingruntime_controller.go | 3 +- .../v1beta1/inferenceservice_webhook.go | 2 +- 45 files changed, 1341 insertions(+), 97 deletions(-) create mode 100644 config/base/kustomization.yaml create mode 100644 config/base/odh_model_controller_manager_patch.yaml create mode 100644 config/base/params-vllm-gaudi.env create mode 100644 config/base/params-vllm-rocm.env create mode 100644 config/base/params.env create mode 100644 config/base/remove-namespace.yaml create mode 100644 config/rbac/auth_proxy_role.yaml create mode 100644 config/rbac/auth_proxy_role_binding.yaml create mode 100644 config/rbac/kserve_prometheus_clusterrole.yaml create mode 100644 config/runtimes/caikit-standalone-template.yaml create mode 100644 config/runtimes/caikit-tgis-template.yaml create mode 100644 config/runtimes/kustomization.yaml create mode 100644 config/runtimes/ovms-kserve-template.yaml create mode 100644 config/runtimes/ovms-mm-template.yaml create mode 100644 config/runtimes/tgis-template.yaml create mode 100644 config/runtimes/vllm-gaudi-template.yaml create mode 100644 config/runtimes/vllm-multinode-template.yaml create mode 100644 config/runtimes/vllm-rocm-template.yaml create mode 100644 config/runtimes/vllm-template.yaml diff --git a/Makefile b/Makefile index abf808f2..efdab541 100644 --- a/Makefile +++ b/Makefile @@ -46,7 +46,7 @@ help: ## Display this help. .PHONY: manifests manifests: controller-gen ## Generate WebhookConfiguration, ClusterRole and CustomResourceDefinition objects. - # Any customization needed, apply to the webhook_patch.yaml file + # Any customization needed, apply to a patch in the kustomize.yaml file on webhooks $(CONTROLLER_GEN) rbac:roleName=odh-model-controller-role,headerFile="hack/manifests_boilerplate.yaml.txt" crd webhook paths="./..." output:crd:artifacts:config=config/crd/bases # TODO: Evaluate if this is still needed diff --git a/cmd/main.go b/cmd/main.go index e716673a..623135e6 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -85,7 +85,7 @@ func main() { flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") - flag.BoolVar(&secureMetrics, "metrics-secure", false, + flag.BoolVar(&secureMetrics, "metrics-secure", false, // TODO: restore to true by default. "If set, the metrics endpoint is served securely via HTTPS. Use --metrics-secure=false to use HTTP instead.") flag.BoolVar(&enableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") diff --git a/config/base/kustomization.yaml b/config/base/kustomization.yaml new file mode 100644 index 00000000..7bf66c58 --- /dev/null +++ b/config/base/kustomization.yaml @@ -0,0 +1,140 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: + - ../default + +namespace: opendatahub +configMapGenerator: + - envs: + - params.env + - params-vllm-rocm.env + - params-vllm-gaudi.env + name: odh-model-controller-parameters +generatorOptions: + disableNameSuffixHash: true + +replacements: + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.tgis-image + targets: + - select: + kind: Template + name: caikit-tgis-serving-template + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.caikit-tgis-image + targets: + - select: + kind: Template + name: caikit-tgis-serving-template + fieldPaths: + - objects.0.spec.containers.1.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.caikit-standalone-image + targets: + - select: + kind: Template + name: caikit-standalone-serving-template + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.tgis-image + targets: + - select: + kind: Template + name: tgis-grpc-serving-template + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.ovms-image + targets: + - select: + kind: Template + name: kserve-ovms + fieldPaths: + - objects.0.spec.containers.0.image + - select: + kind: Template + name: ovms + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.vllm-image + targets: + - select: + kind: Template + name: vllm-runtime-template + fieldPaths: + - objects.0.spec.containers.0.image + - select: + kind: Template + name: vllm-multinode-runtime-template + fieldPaths: + - objects.0.spec.containers.0.image + - objects.0.spec.workerSpec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.vllm-rocm-image + targets: + - select: + kind: Template + name: vllm-rocm-runtime-template + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.vllm-gaudi-image + targets: + - select: + kind: Template + name: vllm-gaudi-runtime-template + fieldPaths: + - objects.0.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: data.odh-model-controller + targets: + - select: + kind: Deployment + name: odh-model-controller + fieldPaths: + - spec.template.spec.containers.0.image + - source: + kind: ConfigMap + version: v1 + name: odh-model-controller-parameters + fieldPath: metadata.namespace + targets: + - select: + kind: ValidatingWebhookConfiguration + name: validating-webhook-configuration + fieldPaths: + - webhooks.0.clientConfig.service.namespace + +patches: +- path: remove-namespace.yaml diff --git a/config/base/odh_model_controller_manager_patch.yaml b/config/base/odh_model_controller_manager_patch.yaml new file mode 100644 index 00000000..7d4dcda2 --- /dev/null +++ b/config/base/odh_model_controller_manager_patch.yaml @@ -0,0 +1,12 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: odh-model-controller +spec: + template: + spec: + containers: + - args: + - --leader-elect + image: $(odh-model-controller) + name: manager diff --git a/config/base/params-vllm-gaudi.env b/config/base/params-vllm-gaudi.env new file mode 100644 index 00000000..10d285ec --- /dev/null +++ b/config/base/params-vllm-gaudi.env @@ -0,0 +1 @@ +vllm-gaudi-image=quay.io/opendatahub/vllm:fast-gaudi \ No newline at end of file diff --git a/config/base/params-vllm-rocm.env b/config/base/params-vllm-rocm.env new file mode 100644 index 00000000..08622f43 --- /dev/null +++ b/config/base/params-vllm-rocm.env @@ -0,0 +1 @@ +vllm-rocm-image=quay.io/opendatahub/vllm:fast-rocm \ No newline at end of file diff --git a/config/base/params.env b/config/base/params.env new file mode 100644 index 00000000..461891f0 --- /dev/null +++ b/config/base/params.env @@ -0,0 +1,7 @@ +odh-model-controller=quay.io/opendatahub/odh-model-controller:fast +caikit-tgis-image=quay.io/opendatahub/caikit-tgis-serving:fast +caikit-standalone-image=quay.io/opendatahub/caikit-nlp:fast +tgis-image=quay.io/opendatahub/text-generation-inference:fast +ovms-image=quay.io/opendatahub/openvino_model_server:2024.3-release +vllm-image=quay.io/opendatahub/vllm:fast +nim-state=removed diff --git a/config/base/remove-namespace.yaml b/config/base/remove-namespace.yaml new file mode 100644 index 00000000..b68652c5 --- /dev/null +++ b/config/base/remove-namespace.yaml @@ -0,0 +1,6 @@ +# Remove namespace resource as namespace will already exist. +$patch: delete +apiVersion: v1 +kind: Namespace +metadata: + name: system diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 7aaf6628..d175fa93 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -16,5 +16,5 @@ patches: # [WEBHOOK] To enable webhook, uncomment the following section # the following config is for teaching kustomize how to do kustomization for CRDs. -#configurations: -#- kustomizeconfig.yaml +configurations: +- kustomizeconfig.yaml diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index 0b8131fc..17f63d37 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -6,7 +6,7 @@ namespace: odh-model-controller-system # "wordpress" becomes "alices-wordpress". # Note that it should also match with the prefix (text before '-') of the namespace # field above. -namePrefix: odh-model-controller- +# namePrefix: odh-model-controller- # Labels to add to all resources and selectors. #labels: @@ -24,14 +24,15 @@ resources: # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER'. 'WEBHOOK' components are required. #- ../certmanager # [PROMETHEUS] To enable prometheus monitor, uncomment all sections with 'PROMETHEUS'. -#- ../prometheus +- ../prometheus # [METRICS] Expose the controller manager metrics service. - metrics_service.yaml # [NETWORK POLICY] Protect the /metrics endpoint and Webhook Server with NetworkPolicy. # Only Pod(s) running a namespace labeled with 'metrics: enabled' will be able to gather the metrics. # Only CR(s) which requires webhooks and are applied on namespaces labeled with 'webhooks: enabled' will # be able to communicate with the Webhook Server. -#- ../network-policy +- ../network-policy +- ../runtimes # Uncomment the patches line if you enable Metrics, and/or are using webhooks and cert-manager patches: diff --git a/config/default/manager_metrics_patch.yaml b/config/default/manager_metrics_patch.yaml index 2aaef653..dee2761d 100644 --- a/config/default/manager_metrics_patch.yaml +++ b/config/default/manager_metrics_patch.yaml @@ -1,4 +1,5 @@ # This patch adds the args to allow exposing the metrics endpoint using HTTPS +# TODO: restore to 8443 port - op: add path: /spec/template/spec/containers/0/args/0 - value: --metrics-bind-address=:8443 + value: --metrics-bind-address=:8080 diff --git a/config/default/manager_webhook_patch.yaml b/config/default/manager_webhook_patch.yaml index 593208fe..3f9e0e84 100644 --- a/config/default/manager_webhook_patch.yaml +++ b/config/default/manager_webhook_patch.yaml @@ -1,11 +1,11 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: controller-manager + name: odh-model-controller namespace: system - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize spec: template: spec: @@ -23,4 +23,4 @@ spec: - name: cert secret: defaultMode: 420 - secretName: webhook-server-cert + secretName: odh-model-controller-webhook-cert diff --git a/config/default/metrics_service.yaml b/config/default/metrics_service.yaml index 2c660b6f..feff3f86 100644 --- a/config/default/metrics_service.yaml +++ b/config/default/metrics_service.yaml @@ -2,16 +2,16 @@ apiVersion: v1 kind: Service metadata: labels: - control-plane: controller-manager - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize - name: controller-manager-metrics-service + control-plane: odh-model-controller +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize + name: odh-model-controller-metrics-service namespace: system spec: ports: - - name: https - port: 8443 + - name: http # TODO: Restore to http + port: 8080 # TODO: Use TLS and change to 8443 protocol: TCP - targetPort: 8443 + targetPort: 8080 # TODO: Use TLS and change to 8443 selector: - control-plane: controller-manager + control-plane: odh-model-controller diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index b95d60e9..59b4907c 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -10,23 +10,23 @@ metadata: apiVersion: apps/v1 kind: Deployment metadata: - name: controller-manager + name: odh-model-controller namespace: system labels: - control-plane: controller-manager - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize + control-plane: odh-model-controller + app: odh-model-controller spec: selector: matchLabels: - control-plane: controller-manager + control-plane: odh-model-controller replicas: 1 template: metadata: annotations: kubectl.kubernetes.io/default-container: manager labels: - control-plane: controller-manager + control-plane: odh-model-controller + app: odh-model-controller spec: # TODO(user): Uncomment the following code to configure the nodeAffinity expression # according to the platforms which are supported by your solution. @@ -65,11 +65,47 @@ spec: - --health-probe-bind-address=:8081 image: controller:latest name: manager + imagePullPolicy: Always securityContext: allowPrivilegeEscalation: false capabilities: drop: - "ALL" + env: + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: AUTH_AUDIENCE + valueFrom: + configMapKeyRef: + name: auth-refs + key: AUTH_AUDIENCE + optional: true + - name: AUTHORINO_LABEL + valueFrom: + configMapKeyRef: + name: auth-refs + key: AUTHORINO_LABEL + optional: true + - name: CONTROL_PLANE_NAME + valueFrom: + configMapKeyRef: + name: service-mesh-refs + key: CONTROL_PLANE_NAME + optional: true + - name: MESH_NAMESPACE + valueFrom: + configMapKeyRef: + name: service-mesh-refs + key: MESH_NAMESPACE + optional: true + - name: NIM_STATE + valueFrom: + configMapKeyRef: + name: odh-model-controller-parameters + key: nim-state + optional: true livenessProbe: httpGet: path: /healthz @@ -82,14 +118,12 @@ spec: port: 8081 initialDelaySeconds: 5 periodSeconds: 10 - # TODO(user): Configure the resources accordingly based on the project requirements. - # More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ resources: limits: cpu: 500m - memory: 128Mi + memory: 2Gi requests: cpu: 10m memory: 64Mi - serviceAccountName: controller-manager + serviceAccountName: odh-model-controller terminationGracePeriodSeconds: 10 diff --git a/config/network-policy/allow-webhook-traffic.yaml b/config/network-policy/allow-webhook-traffic.yaml index 089d1f85..61b5798c 100644 --- a/config/network-policy/allow-webhook-traffic.yaml +++ b/config/network-policy/allow-webhook-traffic.yaml @@ -7,12 +7,12 @@ metadata: labels: app.kubernetes.io/name: odh-model-controller app.kubernetes.io/managed-by: kustomize - name: allow-webhook-traffic + name: odh-model-controller # Original scaffolded name is allow-webhook-traffic namespace: system spec: podSelector: matchLabels: - control-plane: controller-manager + control-plane: odh-model-controller policyTypes: - Ingress ingress: diff --git a/config/network-policy/kustomization.yaml b/config/network-policy/kustomization.yaml index 0872bee1..638c3ef7 100644 --- a/config/network-policy/kustomization.yaml +++ b/config/network-policy/kustomization.yaml @@ -1,3 +1,3 @@ resources: - allow-webhook-traffic.yaml -- allow-metrics-traffic.yaml +#- allow-metrics-traffic.yaml diff --git a/config/prometheus/monitor.yaml b/config/prometheus/monitor.yaml index 666ea91d..435e1b0d 100644 --- a/config/prometheus/monitor.yaml +++ b/config/prometheus/monitor.yaml @@ -3,10 +3,10 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: - control-plane: controller-manager - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize - name: controller-manager-metrics-monitor + control-plane: odh-model-controller +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize + name: odh-model-controller-metrics-monitor namespace: system spec: endpoints: @@ -15,7 +15,7 @@ spec: scheme: https bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token tlsConfig: - # TODO(user): The option insecureSkipVerify: true is not recommended for production since it disables + # The option insecureSkipVerify: true is not recommended for production since it disables # certificate verification. This poses a significant security risk by making the system vulnerable to # man-in-the-middle attacks, where an attacker could intercept and manipulate the communication between # Prometheus and the monitored services. This could lead to unauthorized access to sensitive metrics data, @@ -24,7 +24,7 @@ spec: # caFile: /etc/metrics-certs/ca.crt # certFile: /etc/metrics-certs/tls.crt # keyFile: /etc/metrics-certs/tls.key - insecureSkipVerify: true + insecureSkipVerify: false selector: matchLabels: - control-plane: controller-manager + control-plane: odh-model-controller diff --git a/config/rbac/auth_proxy_role.yaml b/config/rbac/auth_proxy_role.yaml new file mode 100644 index 00000000..2e55d6ae --- /dev/null +++ b/config/rbac/auth_proxy_role.yaml @@ -0,0 +1,17 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: proxy-role +rules: + - apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create + - apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create diff --git a/config/rbac/auth_proxy_role_binding.yaml b/config/rbac/auth_proxy_role_binding.yaml new file mode 100644 index 00000000..807b12e8 --- /dev/null +++ b/config/rbac/auth_proxy_role_binding.yaml @@ -0,0 +1,11 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: proxy-rolebinding +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: proxy-role +subjects: + - kind: ServiceAccount + name: odh-model-controller diff --git a/config/rbac/kserve_prometheus_clusterrole.yaml b/config/rbac/kserve_prometheus_clusterrole.yaml new file mode 100644 index 00000000..180d0e98 --- /dev/null +++ b/config/rbac/kserve_prometheus_clusterrole.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: kserve-prometheus-k8s +rules: +- apiGroups: + - "" + resources: + - services + - endpoints + - pods + verbs: + - get + - list + - watch diff --git a/config/rbac/kustomization.yaml b/config/rbac/kustomization.yaml index a6867d2e..15e38921 100644 --- a/config/rbac/kustomization.yaml +++ b/config/rbac/kustomization.yaml @@ -1,4 +1,5 @@ resources: +- kserve_prometheus_clusterrole.yaml # All RBAC will be applied under this service account in # the deployment namespace. You may comment out this resource # if your manager will use a service account that exists at @@ -15,13 +16,15 @@ resources: # can access the metrics endpoint. Comment the following # permissions if you want to disable this protection. # More info: https://book.kubebuilder.io/reference/metrics.html -- metrics_auth_role.yaml -- metrics_auth_role_binding.yaml +#- metrics_auth_role.yaml +#- metrics_auth_role_binding.yaml - metrics_reader_role.yaml # For each CRD, "Editor" and "Viewer" roles are scaffolded by # default, aiding admins in cluster management. Those roles are # not used by the Project itself. You can comment the following lines # if you do not want those helpers be installed with your Project. -- account_editor_role.yaml -- account_viewer_role.yaml - +#- account_editor_role.yaml +#- account_viewer_role.yaml +# The following RBAC configurations are for Authorino +- auth_proxy_role.yaml +- auth_proxy_role_binding.yaml \ No newline at end of file diff --git a/config/rbac/leader_election_role.yaml b/config/rbac/leader_election_role.yaml index f5ea06b5..3985d7c6 100644 --- a/config/rbac/leader_election_role.yaml +++ b/config/rbac/leader_election_role.yaml @@ -2,9 +2,9 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize name: leader-election-role rules: - apiGroups: diff --git a/config/rbac/leader_election_role_binding.yaml b/config/rbac/leader_election_role_binding.yaml index 3217cfea..94e457a7 100644 --- a/config/rbac/leader_election_role_binding.yaml +++ b/config/rbac/leader_election_role_binding.yaml @@ -1,9 +1,9 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize name: leader-election-rolebinding roleRef: apiGroup: rbac.authorization.k8s.io @@ -11,5 +11,5 @@ roleRef: name: leader-election-role subjects: - kind: ServiceAccount - name: controller-manager + name: odh-model-controller namespace: system diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 48ecdab7..66c5a3cb 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -3,13 +3,15 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - name: manager-role + name: odh-model-controller-role rules: - apiGroups: - "" resources: - configmaps - secrets + - serviceaccounts + - services verbs: - create - delete @@ -21,23 +23,74 @@ rules: - apiGroups: - "" resources: - - configmaps/finalizers - - secrets/finalizers + - endpoints + - namespaces + - pods verbs: + - create + - get + - list + - patch - update + - watch - apiGroups: - - "" + - authorino.kuadrant.io resources: - - configmaps/status - - secrets/status + - authconfigs verbs: + - create + - delete - get + - list - patch - update + - watch - apiGroups: - - nim.opendatahub.io + - datasciencecluster.opendatahub.io resources: - - accounts + - datascienceclusters + verbs: + - get + - list + - watch +- apiGroups: + - dscinitialization.opendatahub.io + resources: + - dscinitializations + verbs: + - get + - list + - watch +- apiGroups: + - extensions + - networking.k8s.io + resources: + - ingresses + verbs: + - get + - list + - watch +- apiGroups: + - maistra.io + resources: + - servicemeshcontrolplanes + verbs: + - get + - list + - use + - watch +- apiGroups: + - maistra.io + resources: + - servicemeshmemberrolls + verbs: + - get + - list + - watch +- apiGroups: + - maistra.io + resources: + - servicemeshmembers verbs: - create - delete @@ -47,24 +100,111 @@ rules: - update - watch - apiGroups: - - nim.opendatahub.io + - monitoring.coreos.com resources: - - accounts/finalizers + - podmonitors + - servicemonitors + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - networking.istio.io + resources: + - gateways + verbs: + - get + - list + - patch + - update + - watch +- apiGroups: + - networking.istio.io + resources: + - virtualservices + - virtualservices/finalizers verbs: + - create + - delete + - get + - list + - patch - update + - watch +- apiGroups: + - networking.k8s.io + resources: + - networkpolicies + verbs: + - create + - delete + - get + - list + - patch + - update + - watch - apiGroups: - nim.opendatahub.io resources: + - accounts - accounts/status verbs: - get + - list + - update + - watch +- apiGroups: + - nim.opendatahub.io + resources: + - accounts/finalizers + verbs: + - update +- apiGroups: + - rbac.authorization.k8s.io + resources: + - clusterrolebindings + - rolebindings + verbs: + - create + - delete + - get + - list - patch - update + - watch - apiGroups: - - serving.kserve.io + - route.openshift.io resources: - - inferenceservices - - servingruntimes + - routes + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - route.openshift.io + resources: + - routes/custom-host + verbs: + - create +- apiGroups: + - security.istio.io + resources: + - authorizationpolicies + verbs: + - get + - list +- apiGroups: + - security.istio.io + resources: + - peerauthentications verbs: - create - delete @@ -76,16 +216,49 @@ rules: - apiGroups: - serving.kserve.io resources: + - inferenceservices - inferenceservices/finalizers - - servingruntimes/finalizers verbs: + - get + - list - update + - watch - apiGroups: - serving.kserve.io resources: - - inferenceservices/status - - servingruntimes/status + - servingruntimes verbs: + - create - get + - list + - update + - watch +- apiGroups: + - serving.kserve.io + resources: + - servingruntimes/finalizers + verbs: + - update +- apiGroups: + - telemetry.istio.io + resources: + - telemetries + verbs: + - create + - delete + - get + - list - patch - update + - watch +- apiGroups: + - template.openshift.io + resources: + - templates + verbs: + - create + - delete + - get + - list + - update + - watch diff --git a/config/rbac/role_binding.yaml b/config/rbac/role_binding.yaml index e39e8339..a77f0225 100644 --- a/config/rbac/role_binding.yaml +++ b/config/rbac/role_binding.yaml @@ -1,15 +1,15 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize - name: manager-rolebinding +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize + name: odh-model-controller-rolebinding-opendatahub roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole - name: manager-role + name: odh-model-controller-role subjects: - kind: ServiceAccount - name: controller-manager + name: odh-model-controller namespace: system diff --git a/config/rbac/service_account.yaml b/config/rbac/service_account.yaml index 1c5d6f91..88cf88e0 100644 --- a/config/rbac/service_account.yaml +++ b/config/rbac/service_account.yaml @@ -1,8 +1,8 @@ apiVersion: v1 kind: ServiceAccount metadata: - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize - name: controller-manager +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize + name: odh-model-controller namespace: system diff --git a/config/runtimes/caikit-standalone-template.yaml b/config/runtimes/caikit-standalone-template.yaml new file mode 100644 index 00000000..a6ee3cf5 --- /dev/null +++ b/config/runtimes/caikit-standalone-template.yaml @@ -0,0 +1,76 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks. + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-nlp + template.openshift.io/long-description: This template defines resources needed to deploy caikit-standalone-serving servingruntime with Red Hat Data Science KServe for LLM model + template.openshift.io/support-url: https://access.redhat.com + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' + name: caikit-standalone-serving-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: caikit-standalone-runtime + annotations: + openshift.io/display-name: Caikit Standalone ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '8086' + prometheus.io/path: /metrics + multiModel: false + supportedModelFormats: + - autoSelect: true + name: caikit + containers: + - name: kserve-container + image: $(caikit-standalone-image) + command: + - python + - '-m' + - caikit.runtime + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + - name: HF_HOME + value: /tmp/hf_home + - name: RUNTIME_GRPC_ENABLED + value: 'false' + - name: RUNTIME_HTTP_ENABLED + value: 'true' + ports: + - containerPort: 8080 + protocol: TCP + readinessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - readiness + initialDelaySeconds: 5 + livenessProbe: + exec: + command: + - python + - -m + - caikit_health_probe + - liveness + initialDelaySeconds: 5 + startupProbe: + httpGet: + port: 8080 + path: /health + # Allow 12 mins to start + failureThreshold: 24 + periodSeconds: 30 \ No newline at end of file diff --git a/config/runtimes/caikit-tgis-template.yaml b/config/runtimes/caikit-tgis-template.yaml new file mode 100644 index 00000000..cebc1ebe --- /dev/null +++ b/config/runtimes/caikit-tgis-template.yaml @@ -0,0 +1,58 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: Caikit is an AI toolkit that enables users to manage models through a set of developer friendly APIs. It provides a consistent format for creating and using AI models against a wide variety of data domains and tasks. + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/caikit-tgis-serving + template.openshift.io/long-description: This template defines resources needed to deploy caikit-tgis-serving servingruntime with Red Hat Data Science KServe for LLM model + template.openshift.io/support-url: https://access.redhat.com + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' + name: caikit-tgis-serving-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: caikit-tgis-runtime + annotations: + openshift.io/display-name: Caikit TGIS ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '3000' + prometheus.io/path: /metrics + multiModel: false + supportedModelFormats: + - autoSelect: true + name: caikit + containers: + - name: kserve-container + image: $(tgis-image) + command: + - text-generation-launcher + args: + - --model-name=/mnt/models/artifacts/ + env: + - name: HF_HOME + value: /tmp/hf_home + - name: transformer-container + image: $(caikit-tgis-image) + env: + - name: RUNTIME_LOCAL_MODELS_DIR + value: /mnt/models + - name: HF_HOME + value: /tmp/hf_home + - name: RUNTIME_GRPC_ENABLED + value: 'false' + - name: RUNTIME_HTTP_ENABLED + value: 'true' + ports: + - containerPort: 8080 + protocol: TCP diff --git a/config/runtimes/kustomization.yaml b/config/runtimes/kustomization.yaml new file mode 100644 index 00000000..cb0518eb --- /dev/null +++ b/config/runtimes/kustomization.yaml @@ -0,0 +1,17 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +labels: + - pairs: + app: odh-dashboard + app.kubernetes.io/part-of: odh-dashboard + includeSelectors: true +resources: + - ovms-mm-template.yaml + - caikit-tgis-template.yaml + - tgis-template.yaml + - ovms-kserve-template.yaml + - vllm-template.yaml + - vllm-multinode-template.yaml + - vllm-rocm-template.yaml + - vllm-gaudi-template.yaml + - caikit-standalone-template.yaml \ No newline at end of file diff --git a/config/runtimes/ovms-kserve-template.yaml b/config/runtimes/ovms-kserve-template.yaml new file mode 100644 index 00000000..1f7a486b --- /dev/null +++ b/config/runtimes/ovms-kserve-template.yaml @@ -0,0 +1,64 @@ +kind: Template +apiVersion: template.openshift.io/v1 +metadata: + name: kserve-ovms + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + tags: 'kserve-ovms,servingruntime' + description: 'OpenVino Model Serving Definition' + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + annotations: + openshift.io/display-name: OpenVINO Model Server + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + name: kserve-ovms + labels: + opendatahub.io/dashboard: 'true' + spec: + multiModel: false + annotations: + prometheus.io/port: '8888' + prometheus.io/path: /metrics + supportedModelFormats: + - name: openvino_ir + version: opset13 + autoSelect: true + - name: onnx + version: '1' + - name: tensorflow + version: '1' + autoSelect: true + - name: tensorflow + version: '2' + autoSelect: true + - name: paddle + version: '2' + autoSelect: true + - name: pytorch + version: '2' + autoSelect: true + protocolVersions: + - v2 + - grpc-v2 + containers: + - name: kserve-container + image: $(ovms-image) + args: + - '--model_name={{.Name}}' + - '--port=8001' + - '--rest_port=8888' + - '--model_path=/mnt/models' + - '--file_system_poll_wait_seconds=0' + - '--grpc_bind_address=0.0.0.0' + - '--rest_bind_address=0.0.0.0' + - '--target_device=AUTO' + - '--metrics_enable' + ports: + - containerPort: 8888 + protocol: TCP diff --git a/config/runtimes/ovms-mm-template.yaml b/config/runtimes/ovms-mm-template.yaml new file mode 100644 index 00000000..5aa0d3fb --- /dev/null +++ b/config/runtimes/ovms-mm-template.yaml @@ -0,0 +1,65 @@ +kind: Template +apiVersion: template.openshift.io/v1 +metadata: + name: ovms + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + tags: 'ovms,servingruntime' + description: 'OpenVino Model Serving Definition' + opendatahub.io/modelServingSupport: '["multi"]' + opendatahub.io/apiProtocol: 'REST' +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: ovms + annotations: + openshift.io/display-name: 'OpenVINO Model Server' + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + builtInAdapter: + env: + - name: OVMS_FORCE_TARGET_DEVICE + value: AUTO + memBufferBytes: 134217728 + modelLoadingTimeoutMillis: 90000 + runtimeManagementPort: 8888 + serverType: ovms + containers: + - args: + - '--port=8001' + - '--rest_port=8888' + - '--config_path=/models/model_config_list.json' + - '--file_system_poll_wait_seconds=0' + - '--grpc_bind_address=0.0.0.0' + - '--rest_bind_address=0.0.0.0' + image: $(ovms-image) + name: ovms + resources: + limits: + cpu: '0' + memory: 0Gi + requests: + cpu: '0' + memory: 0Gi + grpcDataEndpoint: 'port:8001' + grpcEndpoint: 'port:8085' + multiModel: true + protocolVersions: + - grpc-v1 + replicas: 1 + supportedModelFormats: + - autoSelect: true + name: openvino_ir + version: opset1 + - autoSelect: true + name: onnx + version: '1' + - autoSelect: true + name: tensorflow + version: '2' +parameters: [] diff --git a/config/runtimes/tgis-template.yaml b/config/runtimes/tgis-template.yaml new file mode 100644 index 00000000..51bf52c8 --- /dev/null +++ b/config/runtimes/tgis-template.yaml @@ -0,0 +1,49 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: Text Generation Inference Server (TGIS) is a high performance inference engine that deploys and serves Large Language Models. + openshift.io/display-name: TGIS Standalone ServingRuntime for KServe + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/text-generation-inference + template.openshift.io/long-description: This template defines resources needed to deploy TGIS standalone servingruntime with KServe in Red Hat OpenShift AI + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'gRPC' + name: tgis-grpc-serving-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: tgis-grpc-runtime + annotations: + openshift.io/display-name: TGIS Standalone ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '3000' + prometheus.io/path: '/metrics' + multiModel: false + supportedModelFormats: + - autoSelect: true + name: pytorch + containers: + - name: kserve-container + image: $(tgis-image) + command: ['text-generation-launcher'] + args: + - '--model-name=/mnt/models/' + - '--port=3000' + - '--grpc-port=8033' + env: + - name: HF_HOME + value: /tmp/hf_home + ports: + - containerPort: 8033 + name: h2c + protocol: TCP diff --git a/config/runtimes/vllm-gaudi-template.yaml b/config/runtimes/vllm-gaudi-template.yaml new file mode 100644 index 00000000..4579eb0f --- /dev/null +++ b/config/runtimes/vllm-gaudi-template.yaml @@ -0,0 +1,53 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: vLLM ServingRuntime to support Gaudi(for Habana AI processors) + openshift.io/display-name: vLLM ServingRuntime with Gaudi accelerators support for KServe + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/vllm + template.openshift.io/long-description: This template defines resources needed to deploy vLLM ServingRuntime with Gaudi accelerators support for KServe in Red Hat OpenShift AI + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' + name: vllm-gaudi-runtime-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: vllm-gaudi-runtime + annotations: + openshift.io/display-name: vLLM ServingRuntime with Gaudi accelerators support for KServe + opendatahub.io/recommended-accelerators: '["habana.ai/gaudi"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '8080' + prometheus.io/path: '/metrics' + multiModel: false + supportedModelFormats: + - autoSelect: false + name: vLLM + builtInAdapter: + modelLoadingTimeoutMillis: 90000 + containers: + - name: kserve-container + image: $(vllm-gaudi-image) + command: + - python + - -m + - vllm.entrypoints.openai.api_server + args: + - "--port=8080" + - "--model=/mnt/models" + - "--served-model-name={{.Name}}" + env: + - name: HF_HOME + value: /tmp/hf_home + ports: + - containerPort: 8080 + protocol: TCP \ No newline at end of file diff --git a/config/runtimes/vllm-multinode-template.yaml b/config/runtimes/vllm-multinode-template.yaml new file mode 100644 index 00000000..ac6d9b71 --- /dev/null +++ b/config/runtimes/vllm-multinode-template.yaml @@ -0,0 +1,279 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: "false" + opendatahub.io/ootb: "true" + annotations: + description: vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs + openshift.io/display-name: vLLM ServingRuntime Multi-Node for KServe + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime,multi-node + template.openshift.io/documentation-url: https://github.com/opendatahub-io/vllm + template.openshift.io/long-description: This template defines resources needed to deploy vLLM ServingRuntime Multi-Node with KServe in Red Hat OpenShift AI + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: "REST" + name: vllm-multinode-runtime-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: vllm-multinode-runtime + annotations: + openshift.io/display-name: vLLM ServingRuntime Multi-Node for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: "false" + spec: + annotations: + prometheus.io/port: "8080" + prometheus.io/path: "/metrics" + multiModel: false + supportedModelFormats: + - autoSelect: true + name: vLLM + priority: 2 + containers: + - name: kserve-container + image: $(vllm-image) + command: [ "bash", "-c" ] + args: + - | + ray start --head --disable-usage-stats --include-dashboard false + # wait for other node to join + until [[ $(ray status --address ${RAY_ADDRESS} | grep -c node_) -eq ${PIPELINE_PARALLEL_SIZE} ]]; do + echo "Waiting..." + sleep 1 + done + ray status --address ${RAY_ADDRESS} + + export SERVED_MODEL_NAME=${MODEL_NAME} + export MODEL_NAME=${MODEL_DIR} + + exec python3 -m vllm.entrypoints.openai.api_server --port=8080 --distributed-executor-backend ray --model=${MODEL_NAME} --served-model-name=${SERVED_MODEL_NAME} --tensor-parallel-size=${TENSOR_PARALLEL_SIZE} --pipeline-parallel-size=${PIPELINE_PARALLEL_SIZE} --disable_custom_all_reduce + env: + - name: RAY_PORT + value: "6379" + - name: RAY_ADDRESS + value: 127.0.0.1:6379 + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: VLLM_NO_USAGE_STATS + value: "1" + - name: HOME + value: /tmp + - name: HF_HOME + value: /tmp/hf_home + resources: + limits: + cpu: "16" + memory: 48Gi + requests: + cpu: "8" + memory: 24Gi + volumeMounts: + - name: shm + mountPath: /dev/shm + livenessProbe: + failureThreshold: 2 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 15 + exec: + command: + - bash + - -c + - | + # Check if the registered ray nodes count is the same as PIPELINE_PARALLEL_SIZE + gpu_status=$(ray status --address ${RAY_ADDRESS} | grep GPU) + if [[ -z ${gpu_status} ]]; then + echo "Unhealthy - GPU does not exist" + exit 1 + fi + + used_gpu=$(echo "${gpu_status}" | awk '{print $1}' | cut -d'/' -f1) + reserved_gpu=$(echo "${gpu_status}" | awk '{print $1}' | cut -d'/' -f2) + + # Determine health status based on GPU usage + if [[ "${used_gpu}" != "${reserved_gpu}" ]]; then + echo "Unhealthy - Used: ${used_gpu}, Reserved: ${reserved_gpu}" + exit 1 + fi + readinessProbe: + failureThreshold: 2 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 15 + exec: + command: + - bash + - -c + - | + # Check if the registered nodes count matches PIPELINE_PARALLEL_SIZE + registered_node_count=$(ray status --address ${RAY_ADDRESS} | grep -c node_) + if [[ ${registered_node_count} -ne "${PIPELINE_PARALLEL_SIZE}" ]]; then + echo "Unhealthy - Registered nodes count (${registered_node_count}) does not match PIPELINE_PARALLEL_SIZE (${PIPELINE_PARALLEL_SIZE})." + exit 1 + fi + + # Check if the registered ray nodes count is the same as PIPELINE_PARALLEL_SIZE + gpu_status=$(ray status --address ${RAY_ADDRESS} | grep GPU) + if [[ -z ${gpu_status} ]]; then + echo "Unhealthy - GPU does not exist" + exit 1 + fi + + used_gpu=$(echo "${gpu_status}" | awk '{print $1}' | cut -d'/' -f1) + reserved_gpu=$(echo "${gpu_status}" | awk '{print $1}' | cut -d'/' -f2) + + # Determine health status based on GPU usage + if [[ "${used_gpu}" != "${reserved_gpu}" ]]; then + echo "Unhealthy - Used: ${used_gpu}, Reserved: ${reserved_gpu}" + exit 1 + fi + + # Check model health + health_check=$(curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/health) + if [[ ${health_check} != 200 ]]; then + echo "Unhealthy - vLLM Runtime Health Check failed." + exit 1 + fi + startupProbe: + failureThreshold: 40 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 30 + initialDelaySeconds: 20 + exec: + command: + - bash + - -c + - | + # This need when head node have issues and restarted. + # It will wait for new worker node. + registered_node_count=$(ray status --address ${RAY_ADDRESS} | grep -c node_) + if [[ ${registered_node_count} -ne "${PIPELINE_PARALLEL_SIZE}" ]]; then + echo "Unhealthy - Registered nodes count (${registered_node_count}) does not match PIPELINE_PARALLEL_SIZE (${PIPELINE_PARALLEL_SIZE})." + exit 1 + fi + + # Double check to make sure Model is ready to serve. + for i in 1 2; do + # Check model health + health_check=$(curl -o /dev/null -s -w "%{http_code}\n" http://localhost:8080/health) + if [[ ${health_check} != 200 ]]; then + echo "Unhealthy - vLLM Runtime Health Check failed." + exit 1 + fi + done + ports: + - containerPort: 8080 + name: http + protocol: TCP + volumes: + - name: shm + emptyDir: + medium: Memory + sizeLimit: 12Gi + workerSpec: + pipelineParallelSize: 2 + tensorParallelSize: 1 + containers: + - name: worker-container + image: $(vllm-image) + command: [ "bash", "-c" ] + args: + - | + SECONDS=0 + + while true; do + if (( SECONDS <= 240 )); then + if ray health-check --address "${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:6379" > /dev/null 2>&1; then + echo "Global Control Service(GCS) is ready." + break + fi + echo "$SECONDS seconds elapsed: Waiting for Global Control Service(GCS) to be ready." + else + if ray health-check --address "${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:6379"; then + echo "Global Control Service(GCS) is ready. Any error messages above can be safely ignored." + break + fi + echo "$SECONDS seconds elapsed: Still waiting for Global Control Service(GCS) to be ready." + echo "For troubleshooting, refer to the FAQ at https://docs.ray.io/en/master/cluster/kubernetes/troubleshooting/troubleshooting.html#kuberay-troubleshootin-guides" + fi + + sleep 5 + done + + export RAY_HEAD_ADDRESS="${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:6379" + echo "Attempting to connect to Ray cluster at $RAY_HEAD_ADDRESS ..." + ray start --address="${RAY_HEAD_ADDRESS}" --block + env: + - name: POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + resources: + limits: + cpu: "16" + memory: 48Gi + requests: + cpu: "8" + memory: 24Gi + volumeMounts: + - name: shm + mountPath: /dev/shm + livenessProbe: + failureThreshold: 2 + periodSeconds: 5 + successThreshold: 1 + timeoutSeconds: 15 + exec: + command: + - bash + - -c + - | + # Check if the registered nodes count matches PIPELINE_PARALLEL_SIZE + registered_node_count=$(ray status --address ${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:6379 | grep -c node_) + if [[ ${registered_node_count} -ne "${PIPELINE_PARALLEL_SIZE}" ]]; then + echo "Unhealthy - Registered nodes count (${registered_node_count}) does not match PIPELINE_PARALLEL_SIZE (${PIPELINE_PARALLEL_SIZE})." + exit 1 + fi + startupProbe: + failureThreshold: 40 + periodSeconds: 30 + successThreshold: 1 + timeoutSeconds: 30 + initialDelaySeconds: 20 + exec: + command: + - /bin/sh + - -c + - | + registered_node_count=$(ray status --address ${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:6379 | grep -c node_) + if [[ ${registered_node_count} -ne "${PIPELINE_PARALLEL_SIZE}" ]]; then + echo "Unhealthy - Registered nodes count (${registered_node_count}) does not match PIPELINE_PARALLEL_SIZE (${PIPELINE_PARALLEL_SIZE})." + exit 1 + fi + + # Double check to make sure Model is ready to serve. + for i in 1 2; do + # Check model health + model_health_check=$(curl -s ${HEAD_SVC}.${POD_NAMESPACE}.svc.cluster.local:8080/v1/models|grep -o ${ISVC_NAME}) + if [[ ${model_health_check} != "${ISVC_NAME}" ]]; then + echo "Unhealthy - vLLM Runtime Health Check failed." + exit 1 + fi + sleep 10 + done + volumes: + - name: shm + emptyDir: + medium: Memory + sizeLimit: 12Gi diff --git a/config/runtimes/vllm-rocm-template.yaml b/config/runtimes/vllm-rocm-template.yaml new file mode 100644 index 00000000..634e5966 --- /dev/null +++ b/config/runtimes/vllm-rocm-template.yaml @@ -0,0 +1,51 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: vLLM ServingRuntime to support ROCm (for AMD GPUs) + openshift.io/display-name: vLLM ROCm ServingRuntime for KServe + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/vllm + template.openshift.io/long-description: This template defines resources needed to deploy vLLM ServingRuntime with KServe in Red Hat OpenShift AI + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' + name: vllm-rocm-runtime-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: vllm-rocm-runtime + annotations: + openshift.io/display-name: vLLM ROCm ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["amd.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '8080' + prometheus.io/path: '/metrics' + multiModel: false + supportedModelFormats: + - autoSelect: true + name: vLLM + containers: + - name: kserve-container + image: $(vllm-rocm-image) + command: + - python + - -m + - vllm.entrypoints.openai.api_server + args: + - "--port=8080" + - "--model=/mnt/models" + - "--served-model-name={{.Name}}" + env: + - name: HF_HOME + value: /tmp/hf_home + ports: + - containerPort: 8080 + protocol: TCP diff --git a/config/runtimes/vllm-template.yaml b/config/runtimes/vllm-template.yaml new file mode 100644 index 00000000..e0ff4653 --- /dev/null +++ b/config/runtimes/vllm-template.yaml @@ -0,0 +1,51 @@ +apiVersion: template.openshift.io/v1 +kind: Template +metadata: + labels: + opendatahub.io/dashboard: 'true' + opendatahub.io/ootb: 'true' + annotations: + description: vLLM is a high-throughput and memory-efficient inference and serving engine for LLMs + openshift.io/display-name: vLLM ServingRuntime for KServe + openshift.io/provider-display-name: Red Hat, Inc. + tags: rhods,rhoai,kserve,servingruntime + template.openshift.io/documentation-url: https://github.com/opendatahub-io/vllm + template.openshift.io/long-description: This template defines resources needed to deploy vLLM ServingRuntime with KServe in Red Hat OpenShift AI + opendatahub.io/modelServingSupport: '["single"]' + opendatahub.io/apiProtocol: 'REST' + name: vllm-runtime-template +objects: + - apiVersion: serving.kserve.io/v1alpha1 + kind: ServingRuntime + metadata: + name: vllm-runtime + annotations: + openshift.io/display-name: vLLM ServingRuntime for KServe + opendatahub.io/recommended-accelerators: '["nvidia.com/gpu"]' + labels: + opendatahub.io/dashboard: 'true' + spec: + annotations: + prometheus.io/port: '8080' + prometheus.io/path: '/metrics' + multiModel: false + supportedModelFormats: + - autoSelect: true + name: vLLM + containers: + - name: kserve-container + image: $(vllm-image) + command: + - python + - -m + - vllm.entrypoints.openai.api_server + args: + - "--port=8080" + - "--model=/mnt/models" + - "--served-model-name={{.Name}}" + env: + - name: HF_HOME + value: /tmp/hf_home + ports: + - containerPort: 8080 + protocol: TCP diff --git a/config/webhook/kustomization.yaml b/config/webhook/kustomization.yaml index 9cf26134..5f5d3f74 100644 --- a/config/webhook/kustomization.yaml +++ b/config/webhook/kustomization.yaml @@ -2,5 +2,45 @@ resources: - manifests.yaml - service.yaml +patches: +- patch: |- + apiVersion: admissionregistration.k8s.io/v1 + kind: ValidatingWebhookConfiguration + metadata: + name: validating.odh-model-controller.opendatahub.io + annotations: + service.beta.openshift.io/inject-cabundle: true + webhooks: + - name: validating.ksvc.odh-model-controller.opendatahub.io + clientConfig: + service: + name: odh-model-controller-webhook-service + objectSelector: + matchExpressions: + - key: serving.kserve.io/inferenceservice + operator: Exists + - name: validating.nim.account.odh-model-controller.opendatahub.io + clientConfig: + service: + name: odh-model-controller-webhook-service + - name: validating.isvc.odh-model-controller.opendatahub.io + clientConfig: + service: + name: odh-model-controller-webhook-service + target: + group: admissionregistration.k8s.io + kind: ValidatingWebhookConfiguration + name: validating-webhook-configuration + version: v1 +- patch: |- + - op: replace + path: /metadata/name + value: validating.odh-model-controller.opendatahub.io + target: + group: admissionregistration.k8s.io + kind: ValidatingWebhookConfiguration + name: validating-webhook-configuration + version: v1 + configurations: - kustomizeconfig.yaml diff --git a/config/webhook/manifests.yaml b/config/webhook/manifests.yaml index 82b62c8f..6bf3b9f5 100644 --- a/config/webhook/manifests.yaml +++ b/config/webhook/manifests.yaml @@ -51,7 +51,7 @@ webhooks: namespace: system path: /validate-serving-kserve-io-v1beta1-inferenceservice failurePolicy: Fail - name: vinferenceservice-v1beta1.kb.io + name: validating.isvc.odh-model-controller.opendatahub.io rules: - apiGroups: - serving.kserve.io diff --git a/config/webhook/service.yaml b/config/webhook/service.yaml index c334b323..d93e1b4f 100644 --- a/config/webhook/service.yaml +++ b/config/webhook/service.yaml @@ -1,10 +1,12 @@ apiVersion: v1 kind: Service metadata: - labels: - app.kubernetes.io/name: odh-model-controller - app.kubernetes.io/managed-by: kustomize - name: webhook-service +# labels: +# app.kubernetes.io/name: odh-model-controller +# app.kubernetes.io/managed-by: kustomize + annotations: + service.beta.openshift.io/serving-cert-secret-name: odh-model-controller-webhook-cert + name: odh-model-controller-webhook-service namespace: system spec: ports: @@ -12,4 +14,4 @@ spec: protocol: TCP targetPort: 9443 selector: - control-plane: controller-manager + control-plane: odh-model-controller diff --git a/internal/controller/core/configmap_controller.go b/internal/controller/core/configmap_controller.go index 356a7a33..7c87152a 100644 --- a/internal/controller/core/configmap_controller.go +++ b/internal/controller/core/configmap_controller.go @@ -50,8 +50,6 @@ type ConfigMapReconciler struct { } // +kubebuilder:rbac:groups=core,resources=configmaps,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=configmaps/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=core,resources=configmaps/finalizers,verbs=update // reconcileConfigMap watch odh global ca cert and it will create/update/delete kserve custom cert configmap func (r *ConfigMapReconciler) reconcileConfigMap(configmap *corev1.ConfigMap, ctx context.Context, log logr.Logger) error { diff --git a/internal/controller/core/secret_controller.go b/internal/controller/core/secret_controller.go index 60f7c037..79007805 100644 --- a/internal/controller/core/secret_controller.go +++ b/internal/controller/core/secret_controller.go @@ -49,8 +49,6 @@ type SecretReconciler struct { } // +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=core,resources=secrets/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=core,resources=secrets/finalizers,verbs=update // newStorageSecret takes a list of data connection secrets and generates a single storage config secret // https://github.com/kserve/modelmesh-serving/blob/main/docs/predictors/setup-storage.md diff --git a/internal/controller/nim/account_controller.go b/internal/controller/nim/account_controller.go index 7001b073..e3048a05 100644 --- a/internal/controller/nim/account_controller.go +++ b/internal/controller/nim/account_controller.go @@ -61,9 +61,10 @@ var ( labels = map[string]string{"opendatahub.io/managed": "true"} ) -// +kubebuilder:rbac:groups=nim.opendatahub.io,resources=accounts,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=nim.opendatahub.io,resources=accounts/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=nim.opendatahub.io,resources=accounts,verbs=get;list;watch;update +// +kubebuilder:rbac:groups=nim.opendatahub.io,resources=accounts/status,verbs=get;list;watch;update // +kubebuilder:rbac:groups=nim.opendatahub.io,resources=accounts/finalizers,verbs=update +// +kubebuilder:rbac:groups=template.openshift.io,resources=templates,verbs=get;list;watch;create;update;delete func (r *AccountReconciler) SetupWithManager(mgr ctrl.Manager, ctx context.Context) error { // TODO: Copied from original main.go... Should it be FromContext? diff --git a/internal/controller/serving/inferenceservice_controller.go b/internal/controller/serving/inferenceservice_controller.go index d8ecce52..e9d7b3d1 100644 --- a/internal/controller/serving/inferenceservice_controller.go +++ b/internal/controller/serving/inferenceservice_controller.go @@ -80,9 +80,30 @@ func NewInferenceServiceReconciler(setupLog logr.Logger, client client.Client, s return isvcReconciler } -// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices/status,verbs=get;update;patch -// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices/finalizers,verbs=update +// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices,verbs=get;list;watch;update +// +kubebuilder:rbac:groups=serving.kserve.io,resources=inferenceservices/finalizers,verbs=get;list;watch;update + +// +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.istio.io,resources=virtualservices/finalizers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.istio.io,resources=gateways,verbs=get;list;watch;update;patch +// +kubebuilder:rbac:groups=security.istio.io,resources=peerauthentications,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=security.istio.io,resources=authorizationpolicies,verbs=get;list +// +kubebuilder:rbac:groups=telemetry.istio.io,resources=telemetries,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=maistra.io,resources=servicemeshmembers,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=maistra.io,resources=servicemeshmemberrolls,verbs=get;list;watch +// +kubebuilder:rbac:groups=maistra.io,resources=servicemeshcontrolplanes,verbs=get;list;watch;use +// +kubebuilder:rbac:groups=route.openshift.io,resources=routes,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=route.openshift.io,resources=routes/custom-host,verbs=create +// +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings;rolebindings,verbs=get;list;watch;create;update;patch;watch;delete +// +kubebuilder:rbac:groups=networking.k8s.io,resources=networkpolicies,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;podmonitors,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=extensions,resources=ingresses,verbs=get;list;watch +// +kubebuilder:rbac:groups="",resources=namespaces;pods;endpoints,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=secrets;configmaps;serviceaccounts;services,verbs=get;list;watch;create;update;delete;patch +// +kubebuilder:rbac:groups=authorino.kuadrant.io,resources=authconfigs,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=datasciencecluster.opendatahub.io,resources=datascienceclusters,verbs=get;list;watch +// +kubebuilder:rbac:groups=dscinitialization.opendatahub.io,resources=dscinitializations,verbs=get;list;watch // Reconcile performs the reconciling of the Openshift objects for a Kubeflow // InferenceService. diff --git a/internal/controller/serving/servingruntime_controller.go b/internal/controller/serving/servingruntime_controller.go index 0eae3a3c..c67ff106 100644 --- a/internal/controller/serving/servingruntime_controller.go +++ b/internal/controller/serving/servingruntime_controller.go @@ -51,8 +51,7 @@ type ServingRuntimeReconciler struct { MonitoringNS string } -// +kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes,verbs=get;list;watch;create;update;patch;delete -// +kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes,verbs=get;list;watch;create;update // +kubebuilder:rbac:groups=serving.kserve.io,resources=servingruntimes/finalizers,verbs=update // RoleBindingsAreEqual checks if RoleBinding are equal, if not return false diff --git a/internal/webhook/serving/v1beta1/inferenceservice_webhook.go b/internal/webhook/serving/v1beta1/inferenceservice_webhook.go index 80825ecf..7ff082ce 100644 --- a/internal/webhook/serving/v1beta1/inferenceservice_webhook.go +++ b/internal/webhook/serving/v1beta1/inferenceservice_webhook.go @@ -47,7 +47,7 @@ func SetupInferenceServiceWebhookWithManager(mgr ctrl.Manager) error { // NOTE: The 'path' attribute must follow a specific pattern and should not be modified directly here. // Modifying the path for an invalid path can cause API server errors; failing to locate the webhook. -// +kubebuilder:webhook:path=/validate-serving-kserve-io-v1beta1-inferenceservice,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=inferenceservices,verbs=create,versions=v1beta1,name=vinferenceservice-v1beta1.kb.io,admissionReviewVersions=v1 +// +kubebuilder:webhook:path=/validate-serving-kserve-io-v1beta1-inferenceservice,mutating=false,failurePolicy=fail,sideEffects=None,groups=serving.kserve.io,resources=inferenceservices,verbs=create,versions=v1beta1,name=validating.isvc.odh-model-controller.opendatahub.io,admissionReviewVersions=v1 // InferenceServiceCustomValidator struct is responsible for validating the InferenceService resource // when it is created, updated, or deleted.