From a2c93f053388944cf61f69520dc1a51711245377 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn Date: Sun, 22 Oct 2023 19:02:58 +0200 Subject: [PATCH 1/5] add runtime version, resource requests and resource limits --- components/kserve/README.md | 4 +++- components/kserve/component.yaml | 40 ++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/components/kserve/README.md b/components/kserve/README.md index 66f0e59b9f0..c6a42842efe 100644 --- a/components/kserve/README.md +++ b/components/kserve/README.md @@ -39,6 +39,9 @@ kserve_op = components.load_component_from_url('https://raw.githubusercontent.co | canary_traffic_percent | `100` | The traffic split percentage between the candidate model and the last ready model | | namespace | | Kubernetes namespace where the KServe service is deployed. If no namespace is provided, `anonymous` will be used unless a namespace is provided in the `inferenceservice_yaml` argument. | | framework | | Machine learning framework for model serving. Currently the supported frameworks are `tensorflow`, `pytorch`, `sklearn`, `xgboost`, `onnx`, `triton`, `pmml`, and `lightgbm`. | +| runtime_version | `latest` | Runtime Version of Machine Learning Framework | +| resource_requests | `{"cpu": "0.5", "memory": "512Mi"}` | CPU and Memory requests for Model Serving | +| resource_limits | `{"cpu": "1", "memory": "1Gi"}` | CPU and Memory limits for Model Serving | | custom_model_spec | `{}` | Custom model runtime container spec in JSON. Sample spec: `{"image": "codait/max-object-detector", "port":5000, "name": "test-container"}` | | inferenceservice_yaml | `{}` | Raw InferenceService serialized YAML for deployment. Use this if you need additional configurations for your InferenceService. | | autoscaling_target | `0` | Autoscaling Target Number. If not 0, sets the following annotation on the InferenceService: `autoscaling.knative.dev/target` | @@ -185,4 +188,3 @@ kserve_op( inferenceservice_yaml=isvc_yaml ) ``` - diff --git a/components/kserve/component.yaml b/components/kserve/component.yaml index 4bdcaac7b56..9d7b97e3e23 100644 --- a/components/kserve/component.yaml +++ b/components/kserve/component.yaml @@ -1,25 +1,28 @@ name: Serve a model with KServe description: Serve Models using KServe inputs: - - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} - - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} - - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} - - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} - - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} - - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} - - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} - - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} - - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} - - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} - - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} - - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} - - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} - - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} - - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} - - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} + - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} + - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} + - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} + - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} + - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} + - {name: Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} + - {name: Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for Model Serving'} + - {name: Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for Model Serving'} + - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} + - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} + - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} + - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} + - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} + - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} + - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} + - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} + - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} + - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} outputs: - - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} + - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} implementation: container: image: quay.io/aipipeline/kserve-component:v0.11.1 @@ -32,6 +35,9 @@ implementation: --canary-traffic-percent, {inputValue: Canary Traffic Percent}, --namespace, {inputValue: Namespace}, --framework, {inputValue: Framework}, + --runtime-version, {inputValue: Runtime Version}, + --resource-requests, {inputValue: Resource Requests}, + --resource-limits, {inputValue: Resource Limits}, --custom-model-spec, {inputValue: Custom Model Spec}, --autoscaling-target, {inputValue: Autoscaling Target}, --service-account, {inputValue: Service Account}, From d009b71ad5dab244bd1a404c9b888969ce28efc3 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn Date: Sun, 22 Oct 2023 19:03:19 +0200 Subject: [PATCH 2/5] adjust kservedeployer --- components/kserve/src/kservedeployer.py | 52 ++++++++++++++++++++----- 1 file changed, 43 insertions(+), 9 deletions(-) diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index db84e41727e..54e218e8278 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -21,6 +21,7 @@ import yaml from kubernetes import client +from kubernetes import V1ResourceRequirements from kserve import constants from kserve import KServeClient @@ -50,8 +51,9 @@ } -def create_predictor_spec(framework, storage_uri, canary_traffic_percent, - service_account, min_replicas, max_replicas, containers, request_timeout): +def create_predictor_spec(framework, runtime_version, resource_requests, resource_limits, + storage_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout): """ Create and return V1beta1PredictorSpec to be used in a V1beta1InferenceServiceSpec object. @@ -81,7 +83,14 @@ def create_predictor_spec(framework, storage_uri, canary_traffic_percent, setattr( predictor_spec, framework, - AVAILABLE_FRAMEWORKS[framework](storage_uri=storage_uri) + AVAILABLE_FRAMEWORKS[framework]( + storage_uri=storage_uri, + resources=V1ResourceRequirements( + requests=resource_requests, + limits=resource_limits + ), + runtime_version=runtime_version + ) ) return predictor_spec @@ -178,10 +187,10 @@ def submit_api_request(kserve_client, action, name, isvc, namespace=None, return outputs -def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, - framework, custom_model_spec, service_account, inferenceservice_yaml, - request_timeout, autoscaling_target=0, enable_istio_sidecar=True, - watch_timeout=300, min_replicas=0, max_replicas=0): +def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, + runtime_version, resource_requests, resource_limits, custom_model_spec, + service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, + enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): """ Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` was provided, the dict representation of the YAML will be sent directly to the @@ -224,8 +233,9 @@ def perform_action(action, model_name, model_uri, canary_traffic_percent, namesp # Build the V1beta1PredictorSpec. predictor_spec = create_predictor_spec( - framework, model_uri, canary_traffic_percent, service_account, - min_replicas, max_replicas, containers, request_timeout + framework, runtime_version, resource_requests, resource_limits, + model_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout ) isvc = create_inference_service(metadata, predictor_spec) @@ -287,6 +297,24 @@ def main(): str(list(AVAILABLE_FRAMEWORKS.keys())), default="" ) + parser.add_argument( + "--runtime-version", + type=str, + help="Runtime Version of Machine Learning Framework", + default="latest" + ) + parser.add_argument( + "--resource-requests", + type=json.loads, + help="CPU and Memory requests for Model Serving", + default="{'cpu': '0.5', 'memory': '512Mi'}", + ) + parser.add_argument( + "--resource-limits", + type=json.loads, + help="CPU and Memory limits for Model Serving", + default="{'cpu': '1', 'memory': '1Gi'}", + ) parser.add_argument( "--custom-model-spec", type=json.loads, @@ -342,6 +370,9 @@ def main(): canary_traffic_percent = int(args.canary_traffic_percent) namespace = args.namespace framework = args.framework.lower() + runtime_version = args.runtime_version.lower() + resource_requests = args.resource_requests + resource_limits = args.resource_limits output_path = args.output_path custom_model_spec = args.custom_model_spec autoscaling_target = int(args.autoscaling_target) @@ -381,6 +412,9 @@ def main(): canary_traffic_percent=canary_traffic_percent, namespace=namespace, framework=framework, + runtime_version=runtime_version, + resource_requests=resource_requests, + resource_limits=resource_limits, custom_model_spec=custom_model_spec, autoscaling_target=autoscaling_target, service_account=service_account, From 8e1d6523e50e94e0200e2f914b7f9a77ed61fdbe Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn <139039524+magdalenakuhn17@users.noreply.github.com> Date: Mon, 23 Oct 2023 21:53:11 +0200 Subject: [PATCH 3/5] Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li --- components/kserve/src/kservedeployer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index 54e218e8278..ed6d40f24db 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -307,7 +307,7 @@ def main(): "--resource-requests", type=json.loads, help="CPU and Memory requests for Model Serving", - default="{'cpu': '0.5', 'memory': '512Mi'}", + default='{"cpu": "0.5", "memory": "512Mi"}', ) parser.add_argument( "--resource-limits", From 3a5c2737832c78102bb9ea240943e63fe6747716 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn <139039524+magdalenakuhn17@users.noreply.github.com> Date: Mon, 23 Oct 2023 21:53:16 +0200 Subject: [PATCH 4/5] Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li --- components/kserve/src/kservedeployer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index ed6d40f24db..536164e13d6 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -313,7 +313,7 @@ def main(): "--resource-limits", type=json.loads, help="CPU and Memory limits for Model Serving", - default="{'cpu': '1', 'memory': '1Gi'}", + default='{"cpu": "1", "memory": "1Gi"}', ) parser.add_argument( "--custom-model-spec", From d4c637745461a22d75a14229f7f15bd0f2ccc521 Mon Sep 17 00:00:00 2001 From: Magdalena Kuhn <139039524+magdalenakuhn17@users.noreply.github.com> Date: Mon, 23 Oct 2023 21:56:59 +0200 Subject: [PATCH 5/5] Update components/kserve/src/kservedeployer.py Co-authored-by: Tommy Li --- components/kserve/src/kservedeployer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/kserve/src/kservedeployer.py b/components/kserve/src/kservedeployer.py index 536164e13d6..c8799332f76 100644 --- a/components/kserve/src/kservedeployer.py +++ b/components/kserve/src/kservedeployer.py @@ -21,7 +21,7 @@ import yaml from kubernetes import client -from kubernetes import V1ResourceRequirements +from kubernetes.client.models import V1ResourceRequirements from kserve import constants from kserve import KServeClient