From 7a6eab5b67e5bd7d4ca4ced64358e2dd42054e0e Mon Sep 17 00:00:00 2001 From: Cory Latschkowski Date: Wed, 18 Oct 2023 23:42:20 -0500 Subject: [PATCH] fix: all rhods --- .../rhods-model-instances/kustomization.yaml | 8 +++ .../openvino}/inference.yaml | 4 +- .../openvino}/kustomization.yaml | 2 + .../openvino}/runtime.yaml | 6 +-- .../triton/inference.yaml | 21 ++++++++ .../triton/kustomization.yaml | 8 +++ .../rhods-model-instances/triton/runtime.yaml | 54 +++++++++++++++++++ .../NOTES.md | 0 .../kustomization.yaml | 2 + .../triton/kustomization.yaml | 2 +- .../triton/runtime-template.yaml} | 12 ++--- demos/rhods/kustomization.yaml | 3 +- 12 files changed, 108 insertions(+), 14 deletions(-) create mode 100644 components/configs/kustomized/rhods-model-instances/kustomization.yaml rename components/configs/kustomized/{model-server => rhods-model-instances/openvino}/inference.yaml (90%) rename components/configs/kustomized/{model-server => rhods-model-instances/openvino}/kustomization.yaml (78%) rename components/configs/kustomized/{model-server => rhods-model-instances/openvino}/runtime.yaml (94%) create mode 100644 components/configs/kustomized/rhods-model-instances/triton/inference.yaml create mode 100644 components/configs/kustomized/rhods-model-instances/triton/kustomization.yaml create mode 100644 components/configs/kustomized/rhods-model-instances/triton/runtime.yaml rename components/configs/kustomized/{model-serving-runtime => rhods-model-runtimes}/NOTES.md (100%) rename components/configs/kustomized/{model-serving-runtime => rhods-model-runtimes}/kustomization.yaml (70%) rename components/configs/kustomized/{model-serving-runtime => rhods-model-runtimes}/triton/kustomization.yaml (76%) rename components/configs/kustomized/{model-serving-runtime/triton/serving-runtime.yaml => rhods-model-runtimes/triton/runtime-template.yaml} (92%) diff --git a/components/configs/kustomized/rhods-model-instances/kustomization.yaml b/components/configs/kustomized/rhods-model-instances/kustomization.yaml new file mode 100644 index 00000000..b837fb45 --- /dev/null +++ b/components/configs/kustomized/rhods-model-instances/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: ds-group-project + +resources: +- openvino +- triton diff --git a/components/configs/kustomized/model-server/inference.yaml b/components/configs/kustomized/rhods-model-instances/openvino/inference.yaml similarity index 90% rename from components/configs/kustomized/model-server/inference.yaml rename to components/configs/kustomized/rhods-model-instances/openvino/inference.yaml index 3eeefb0e..02cf13bd 100644 --- a/components/configs/kustomized/model-server/inference.yaml +++ b/components/configs/kustomized/rhods-model-instances/openvino/inference.yaml @@ -8,14 +8,14 @@ metadata: labels: name: stocks opendatahub.io/dashboard: "true" - name: stocks + name: openvino-stocks spec: predictor: model: modelFormat: name: onnx version: "1" - runtime: stocks + runtime: vino storage: key: minio-connection path: stocks.onnx \ No newline at end of file diff --git a/components/configs/kustomized/model-server/kustomization.yaml b/components/configs/kustomized/rhods-model-instances/openvino/kustomization.yaml similarity index 78% rename from components/configs/kustomized/model-server/kustomization.yaml rename to components/configs/kustomized/rhods-model-instances/openvino/kustomization.yaml index 58c9c20d..0dc50166 100644 --- a/components/configs/kustomized/model-server/kustomization.yaml +++ b/components/configs/kustomized/rhods-model-instances/openvino/kustomization.yaml @@ -1,6 +1,8 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: ds-group-project + resources: - inference.yaml - runtime.yaml diff --git a/components/configs/kustomized/model-server/runtime.yaml b/components/configs/kustomized/rhods-model-instances/openvino/runtime.yaml similarity index 94% rename from components/configs/kustomized/model-server/runtime.yaml rename to components/configs/kustomized/rhods-model-instances/openvino/runtime.yaml index 9152d92d..07a89ef5 100644 --- a/components/configs/kustomized/model-server/runtime.yaml +++ b/components/configs/kustomized/rhods-model-instances/openvino/runtime.yaml @@ -8,11 +8,11 @@ metadata: opendatahub.io/disable-gpu: "true" opendatahub.io/template-display-name: OpenVINO Model Server opendatahub.io/template-name: ovms - openshift.io/display-name: stocks + openshift.io/display-name: vino labels: - name: stocks + name: vino opendatahub.io/dashboard: "true" - name: stocks + name: vino spec: builtInAdapter: memBufferBytes: 134217728 diff --git a/components/configs/kustomized/rhods-model-instances/triton/inference.yaml b/components/configs/kustomized/rhods-model-instances/triton/inference.yaml new file mode 100644 index 00000000..6c7bd605 --- /dev/null +++ b/components/configs/kustomized/rhods-model-instances/triton/inference.yaml @@ -0,0 +1,21 @@ +--- +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + annotations: + openshift.io/display-name: stocks + serving.kserve.io/deploymentMode: ModelMesh + labels: + name: stocks + opendatahub.io/dashboard: "true" + name: triton-stocks +spec: + predictor: + model: + modelFormat: + name: tensorflow + version: "1" + runtime: triton + storage: + key: minio-connection + path: stocks.onnx \ No newline at end of file diff --git a/components/configs/kustomized/rhods-model-instances/triton/kustomization.yaml b/components/configs/kustomized/rhods-model-instances/triton/kustomization.yaml new file mode 100644 index 00000000..0dc50166 --- /dev/null +++ b/components/configs/kustomized/rhods-model-instances/triton/kustomization.yaml @@ -0,0 +1,8 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization + +namespace: ds-group-project + +resources: +- inference.yaml +- runtime.yaml diff --git a/components/configs/kustomized/rhods-model-instances/triton/runtime.yaml b/components/configs/kustomized/rhods-model-instances/triton/runtime.yaml new file mode 100644 index 00000000..89c09ba4 --- /dev/null +++ b/components/configs/kustomized/rhods-model-instances/triton/runtime.yaml @@ -0,0 +1,54 @@ +--- +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + annotations: + enable-auth: "false" + enable-route: "false" + opendatahub.io/disable-gpu: "true" + opendatahub.io/template-display-name: OpenVINO Model Server + opendatahub.io/template-name: triton + openshift.io/display-name: trition + labels: + name: triton + opendatahub.io/dashboard: "true" + name: triton +spec: + builtInAdapter: + memBufferBytes: 134217728 + modelLoadingTimeoutMillis: 90000 + runtimeManagementPort: 8888 + serverType: ovms + containers: + - args: + - --port=8001 + - --rest_port=8888 + - --config_path=/models/model_config_list.json + - --file_system_poll_wait_seconds=0 + - --grpc_bind_address=127.0.0.1 + - --rest_bind_address=127.0.0.1 + image: quay.io/opendatahub/openvino_model_server@sha256:20dbfbaf53d1afbd47c612d953984238cb0e207972ed544a5ea662c2404f276d + name: ovms + resources: + limits: + cpu: "2" + memory: 8Gi + requests: + cpu: "1" + memory: 4Gi + grpcDataEndpoint: port:8001 + grpcEndpoint: port:8085 + multiModel: true + protocolVersions: + - grpc-v1 + replicas: 1 + supportedModelFormats: + - autoSelect: true + name: openvino_ir + version: opset1 + - autoSelect: true + name: onnx + version: "1" + - autoSelect: true + name: tensorflow + version: "2" diff --git a/components/configs/kustomized/model-serving-runtime/NOTES.md b/components/configs/kustomized/rhods-model-runtimes/NOTES.md similarity index 100% rename from components/configs/kustomized/model-serving-runtime/NOTES.md rename to components/configs/kustomized/rhods-model-runtimes/NOTES.md diff --git a/components/configs/kustomized/model-serving-runtime/kustomization.yaml b/components/configs/kustomized/rhods-model-runtimes/kustomization.yaml similarity index 70% rename from components/configs/kustomized/model-serving-runtime/kustomization.yaml rename to components/configs/kustomized/rhods-model-runtimes/kustomization.yaml index b343c809..3a80eadb 100644 --- a/components/configs/kustomized/model-serving-runtime/kustomization.yaml +++ b/components/configs/kustomized/rhods-model-runtimes/kustomization.yaml @@ -1,5 +1,7 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization +namespace: redhat-ods-applications + resources: - triton diff --git a/components/configs/kustomized/model-serving-runtime/triton/kustomization.yaml b/components/configs/kustomized/rhods-model-runtimes/triton/kustomization.yaml similarity index 76% rename from components/configs/kustomized/model-serving-runtime/triton/kustomization.yaml rename to components/configs/kustomized/rhods-model-runtimes/triton/kustomization.yaml index ed539a59..644134a0 100644 --- a/components/configs/kustomized/model-serving-runtime/triton/kustomization.yaml +++ b/components/configs/kustomized/rhods-model-runtimes/triton/kustomization.yaml @@ -2,4 +2,4 @@ apiVersion: kustomize.config.k8s.io/v1beta1 kind: Kustomization resources: -- serving-runtime.yaml +- runtime-template.yaml diff --git a/components/configs/kustomized/model-serving-runtime/triton/serving-runtime.yaml b/components/configs/kustomized/rhods-model-runtimes/triton/runtime-template.yaml similarity index 92% rename from components/configs/kustomized/model-serving-runtime/triton/serving-runtime.yaml rename to components/configs/kustomized/rhods-model-runtimes/triton/runtime-template.yaml index 1d8a8f37..59723c37 100644 --- a/components/configs/kustomized/model-serving-runtime/triton/serving-runtime.yaml +++ b/components/configs/kustomized/rhods-model-runtimes/triton/runtime-template.yaml @@ -7,11 +7,10 @@ metadata: argocd.argoproj.io/sync-wave: "2" description: Nvidia Triton Inference Server Runtime Definition labels: - opendatahub.io/configurable: "true" - opendatahub.io/dashboard: "true" - # opendatahub.io/ootb: "true" + opendatahub.io/configurable: "true" + opendatahub.io/dashboard: "true" + # opendatahub.io/ootb: "true" name: triton - namespace: redhat-ods-applications objects: - apiVersion: serving.kserve.io/v1alpha1 kind: ServingRuntime @@ -73,7 +72,7 @@ objects: --strict-readiness=false \ --allow-http=true \ --allow-sagemaker=false - + volumeMounts: - name: shm mountPath: /dev/shm @@ -99,8 +98,7 @@ objects: --fail \ --silent \ --show-error \ - --max-time \ - "9" \ + --max-time "8" \ http://localhost:8000/v2/health/live initialDelaySeconds: 5 periodSeconds: 30 diff --git a/demos/rhods/kustomization.yaml b/demos/rhods/kustomization.yaml index b0d6a6a6..24706eac 100644 --- a/demos/rhods/kustomization.yaml +++ b/demos/rhods/kustomization.yaml @@ -7,8 +7,9 @@ resources: - ../../components/operators/openshift-pipelines-operator-rh/operator/overlays/latest - ../../components/configs/kustomized/rhods - ../../components/configs/kustomized/rhods-projects + - ../../components/configs/kustomized/rhods-model-runtimes + # - ../../components/configs/kustomized/rhods-model-instances - ../../components/configs/kustomized/custom-notebook-images/overlays/rhods - - ../../components/configs/kustomized/model-serving-runtime - ../../components/configs/kustomized/minio patches: