From e48554249a8db97a679bdcdc96c9607bdc7e01f1 Mon Sep 17 00:00:00 2001 From: Sivanantham <90966311+sivanantha321@users.noreply.github.com> Date: Sun, 26 Nov 2023 20:45:21 +0530 Subject: [PATCH] Document servingruntime constraint introduced by kserve/kserve#3181 (#320) * Document serving runtime constraint introduced by kserve/kserve#3181 Signed-off-by: Sivanantham Chinnaiyan * Set content type for predict/explainer curl requests Signed-off-by: Sivanantham Chinnaiyan * Update docs/modelserving/servingruntimes.md Signed-off-by: Dan Sun --------- Signed-off-by: Sivanantham Chinnaiyan Signed-off-by: Dan Sun Co-authored-by: Dan Sun --- .../serverless/kourier_networking/README.md | 2 +- .../explainer/alibi/income/README.md | 4 ++-- .../explainer/alibi/moviesentiment/README.md | 8 +++---- .../inference_graph/image_pipeline/README.md | 2 +- docs/modelserving/logger/logger.md | 4 ++-- docs/modelserving/servingruntimes.md | 21 +++++++++++++++++++ docs/modelserving/storage/azure/azure.md | 2 +- docs/modelserving/storage/pvc/pvc.md | 2 +- docs/modelserving/storage/s3/s3.md | 2 +- docs/modelserving/storage/uri/uri.md | 4 ++-- docs/modelserving/v1beta1/amd/README.md | 2 +- .../v1beta1/custom/custom_model/README.md | 4 ++-- docs/modelserving/v1beta1/lightgbm/README.md | 2 +- docs/modelserving/v1beta1/pmml/README.md | 2 +- .../v1beta1/rollout/canary-example.md | 4 ++-- docs/modelserving/v1beta1/spark/README.md | 2 +- .../modelserving/v1beta1/tensorflow/README.md | 2 +- .../modelserving/v1beta1/torchserve/README.md | 12 +++++------ .../v1beta1/transformer/feast/README.md | 2 +- .../torchserve_image_transformer/README.md | 4 ++-- .../v1beta1/triton/bert/README.md | 2 +- .../v1beta1/triton/torchscript/README.md | 4 ++-- 22 files changed, 57 insertions(+), 36 deletions(-) diff --git a/docs/admin/serverless/kourier_networking/README.md b/docs/admin/serverless/kourier_networking/README.md index a6fea06f7..ea3a7cd14 100644 --- a/docs/admin/serverless/kourier_networking/README.md +++ b/docs/admin/serverless/kourier_networking/README.md @@ -130,7 +130,7 @@ Send a prediction request to the InferenceService and check the output. MODEL_NAME=pmml-demo INPUT_PATH=@./pmml-input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice pmml-demo -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/explainer/alibi/income/README.md b/docs/modelserving/explainer/alibi/income/README.md index fcde7c209..61ab4c48c 100644 --- a/docs/modelserving/explainer/alibi/income/README.md +++ b/docs/modelserving/explainer/alibi/income/README.md @@ -57,7 +57,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice income -o jsonpath='{.status.url Test the predictor: ```bash -curl -H "Host: $SERVICE_HOSTNAME" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":[[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]]}' +curl -H "Host: $SERVICE_HOSTNAME" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":[[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]]}' ``` You should receive the response showing the prediction is for low salary: @@ -71,7 +71,7 @@ You should receive the response showing the prediction is for low salary: Now lets get an explanation for this: ```bash -curl -H "Host: $SERVICE_HOSTNAME" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":[[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]]}' +curl -H "Host: $SERVICE_HOSTNAME" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":[[39, 7, 1, 1, 1, 1, 4, 1, 2174, 0, 40, 9]]}' ``` The returned explanation will be like: diff --git a/docs/modelserving/explainer/alibi/moviesentiment/README.md b/docs/modelserving/explainer/alibi/moviesentiment/README.md index c9909902b..c4e6f3149 100644 --- a/docs/modelserving/explainer/alibi/moviesentiment/README.md +++ b/docs/modelserving/explainer/alibi/moviesentiment/README.md @@ -56,7 +56,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice moviesentiment -o jsonpath='{.st Test the predictor on an example sentence: ```bash -curl -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' +curl -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' ``` You should receive the response showing negative sentiment: @@ -69,7 +69,7 @@ You should receive the response showing negative sentiment: Test on another sentence: ```bash -curl -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":["a touching , sophisticated film that almost seems like a documentary in the way it captures an italian immigrant family on the brink of major changes ."]}' +curl -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d '{"instances":["a touching , sophisticated film that almost seems like a documentary in the way it captures an italian immigrant family on the brink of major changes ."]}' ``` You should receive the response showing positive sentiment: @@ -83,7 +83,7 @@ Now lets get an explanation for the first sentence: ```bash -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' ``` !!! success "Expected Output" @@ -234,7 +234,7 @@ kubectl create -f moviesentiment2.yaml and then ask for an explanation: ```bash -curl -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' +curl -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:explain -d '{"instances":["a visually flashy but narratively opaque and emotionally vapid exercise ."]}' ``` !!! success "Expected Output" diff --git a/docs/modelserving/inference_graph/image_pipeline/README.md b/docs/modelserving/inference_graph/image_pipeline/README.md index 67c6eedd1..0fe13d501 100644 --- a/docs/modelserving/inference_graph/image_pipeline/README.md +++ b/docs/modelserving/inference_graph/image_pipeline/README.md @@ -146,7 +146,7 @@ The first step is to [determine the ingress IP and ports](../../../get_started/f Now, you can test the inference graph by sending the [cat](cat.json) and [dog image data](dog.json). ```bash SERVICE_HOSTNAME=$(kubectl get inferencegraph dog-breed-pipeline -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT} -d @./cat.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT} -d @./cat.json ``` !!! success "Expected Output" ```{ .json .no-copy } diff --git a/docs/modelserving/logger/logger.md b/docs/modelserving/logger/logger.md index e0fcec504..2161bc302 100644 --- a/docs/modelserving/logger/logger.md +++ b/docs/modelserving/logger/logger.md @@ -77,7 +77,7 @@ MODEL_NAME=sklearn-iris INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" @@ -283,7 +283,7 @@ MODEL_NAME=sklearn-iris INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/servingruntimes.md b/docs/modelserving/servingruntimes.md index 5bdd19989..0fcbe2afb 100644 --- a/docs/modelserving/servingruntimes.md +++ b/docs/modelserving/servingruntimes.md @@ -230,6 +230,27 @@ will be used for model deployment. - The serving runtime with priority takes precedence over the serving runtime with priority not specified. - Two model formats with same name and same model version cannot have the same priority. - If more than one serving runtime supports the model format and none of them specified the priority then, there is no guarantee _which_ runtime will be selected. +- If multiple versions of a modelFormat are supported by a serving runtime, then it should have the same priority. + For example, Below shown serving runtime supports two versions of sklearn. It should have the same priority. + ```yaml + apiVersion: serving.kserve.io/v1alpha1 + kind: ClusterServingRuntime + metadata: + name: mlserver + spec: + protocolVersions: + - v2 + supportedModelFormats: + - name: sklearn + version: "0" + autoSelect: true + priority: 2 + - name: sklearn + version: "1" + autoSelect: true + priority: 2 + ... + ``` !!! warning If multiple runtimes list the same format and/or version as auto-selectable and the priority is not specified, the runtime is selected based on the `creationTimestamp` i.e. the most recently created runtime is selected. So there is no guarantee _which_ runtime will be selected. diff --git a/docs/modelserving/storage/azure/azure.md b/docs/modelserving/storage/azure/azure.md index 8c2d69861..6f25393ac 100644 --- a/docs/modelserving/storage/azure/azure.md +++ b/docs/modelserving/storage/azure/azure.md @@ -108,7 +108,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-azure -o jsonpath='{.sta MODEL_NAME=sklearn-azure INPUT_PATH=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/storage/pvc/pvc.md b/docs/modelserving/storage/pvc/pvc.md index 36013bf83..f79ec151a 100644 --- a/docs/modelserving/storage/pvc/pvc.md +++ b/docs/modelserving/storage/pvc/pvc.md @@ -134,7 +134,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-pvc -o jsonpath='{.statu MODEL_NAME=sklearn-pvc INPUT_PATH=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/storage/s3/s3.md b/docs/modelserving/storage/s3/s3.md index fe454eca2..fe9fcb443 100644 --- a/docs/modelserving/storage/s3/s3.md +++ b/docs/modelserving/storage/s3/s3.md @@ -131,7 +131,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice mnist-s3 -o jsonpath='{.status.u MODEL_NAME=mnist-s3 INPUT_PATH=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/storage/uri/uri.md b/docs/modelserving/storage/uri/uri.md index ff3f3232e..93d274f27 100644 --- a/docs/modelserving/storage/uri/uri.md +++ b/docs/modelserving/storage/uri/uri.md @@ -137,7 +137,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-from-uri -o jsonpath='{. MODEL_NAME=sklearn-from-uri INPUT_PATH=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" @@ -265,7 +265,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice tensorflow-from-uri-gzip -o json MODEL_NAME=tensorflow-from-uri-gzip INPUT_PATH=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/amd/README.md b/docs/modelserving/v1beta1/amd/README.md index dbcaaf521..c5e05ce5f 100644 --- a/docs/modelserving/v1beta1/amd/README.md +++ b/docs/modelserving/v1beta1/amd/README.md @@ -100,7 +100,7 @@ Assuming that `INGRESS_HOST`, `INGRESS_PORT`, and `SERVICE_HOSTNAME` have been d ```bash export MODEL_NAME=mnist export INPUT_DATA=@./input.json -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d ${INPUT_DATA} +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d ${INPUT_DATA} ``` This shows the response from the server in KServe's v2 API format. diff --git a/docs/modelserving/v1beta1/custom/custom_model/README.md b/docs/modelserving/v1beta1/custom/custom_model/README.md index 62fb26e3d..23a6af1a0 100644 --- a/docs/modelserving/v1beta1/custom/custom_model/README.md +++ b/docs/modelserving/v1beta1/custom/custom_model/README.md @@ -81,7 +81,7 @@ docker run -ePORT=8080 -p8080:8080 ${DOCKER_USER}/custom-model:v1 Send a test inference request locally with [input.json](./input.json) ```bash -curl localhost:8080/v1/models/custom-model:predict -d @./input.json +curl -H "Content-Type: application/json" localhost:8080/v1/models/custom-model:predict -d @./input.json ``` !!! success "Expected Output" ```{ .json .no-copy } @@ -146,7 +146,7 @@ MODEL_NAME=custom-model INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice ${MODEL_NAME} -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/lightgbm/README.md b/docs/modelserving/v1beta1/lightgbm/README.md index 513bbaf4d..d73d34e7c 100644 --- a/docs/modelserving/v1beta1/lightgbm/README.md +++ b/docs/modelserving/v1beta1/lightgbm/README.md @@ -101,7 +101,7 @@ To test the deployed model the first step is to [determine the ingress IP and po MODEL_NAME=lightgbm-iris INPUT_PATH=@./iris-input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice lightgbm-iris -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/pmml/README.md b/docs/modelserving/v1beta1/pmml/README.md index 5a105afab..b83e7698a 100644 --- a/docs/modelserving/v1beta1/pmml/README.md +++ b/docs/modelserving/v1beta1/pmml/README.md @@ -63,7 +63,7 @@ You can see an example payload below. Create a file named `iris-input.json` with MODEL_NAME=pmml-demo INPUT_PATH=@./iris-input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice pmml-demo -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/rollout/canary-example.md b/docs/modelserving/v1beta1/rollout/canary-example.md index d6fe1cdd9..0c7ad956d 100644 --- a/docs/modelserving/v1beta1/rollout/canary-example.md +++ b/docs/modelserving/v1beta1/rollout/canary-example.md @@ -249,13 +249,13 @@ MODEL_NAME=sklearn-iris curl the latest revision ```bash -curl -v -H "Host: latest-${MODEL_NAME}-predictor-default.kserve-test.example.com" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d @./iris-input.json +curl -v -H "Host: latest-${MODEL_NAME}-predictor-default.kserve-test.example.com" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d @./iris-input.json ``` or curl the previous revision ```bash -curl -v -H "Host: prev-${MODEL_NAME}-predictor-default.kserve-test.example.com" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d @./iris-input.json +curl -v -H "Host: prev-${MODEL_NAME}-predictor-default.kserve-test.example.com" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d @./iris-input.json ``` diff --git a/docs/modelserving/v1beta1/spark/README.md b/docs/modelserving/v1beta1/spark/README.md index ae30969f2..8e3f4fc38 100644 --- a/docs/modelserving/v1beta1/spark/README.md +++ b/docs/modelserving/v1beta1/spark/README.md @@ -109,7 +109,7 @@ You can see an example payload below. Create a file named `iris-input.json` with MODEL_NAME=spark-pmml INPUT_PATH=@./iris-input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice spark-pmml -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/tensorflow/README.md b/docs/modelserving/v1beta1/tensorflow/README.md index 631b6b290..68a7b45b1 100644 --- a/docs/modelserving/v1beta1/tensorflow/README.md +++ b/docs/modelserving/v1beta1/tensorflow/README.md @@ -60,7 +60,7 @@ file can be downloaded [here](./input.json). MODEL_NAME=flower-sample INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice ${MODEL_NAME} -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict -d $INPUT_PATH ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/torchserve/README.md b/docs/modelserving/v1beta1/torchserve/README.md index 3217931c6..e9fde7458 100644 --- a/docs/modelserving/v1beta1/torchserve/README.md +++ b/docs/modelserving/v1beta1/torchserve/README.md @@ -158,7 +158,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice torchserve -o jsonpath='{.status You can use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v1/imgconv) to convert the images to base64 byte array, for other models please refer to [input request](https://github.com/pytorch/serve/tree/master/kubernetes/kserve/kf_request_json). ```bash -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d @./mnist.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d @./mnist.json ``` !!! success "Expected Output" @@ -194,7 +194,7 @@ curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1 To get model explanation: ```bash -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/mnist:explain -d @./mnist.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/mnist:explain -d @./mnist.json ``` !!! success "Expected Output" @@ -261,7 +261,7 @@ SERVICE_HOSTNAME=$(kubectl get inferenceservice torchserve-mnist-v2 -o jsonpath= You can send both **byte array** and **tensor** with v2 protocol, for byte array use [image converter](https://github.com/kserve/kserve/tree/master/docs/samples/v1beta1/torchserve/v2/bytes_conv) to convert the image to byte array input. Here we use the [mnist_v2_bytes.json](./mnist_v2_bytes.json) file to run an example inference. ```bash -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d @./mnist_v2_bytes.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d @./mnist_v2_bytes.json ``` !!! success "Expected Output" @@ -273,7 +273,7 @@ For tensor input use the [tensor image converter](https://github.com/kserve/kser tensor input and here we use the [mnist_v2.json](./mnist_v2.json) file to run an example inference. ```bash -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d @./mnist_v2.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d @./mnist_v2.json ``` !!! success "Expected Output" @@ -287,7 +287,7 @@ To get the model explanation with v2 explain endpoint: ```bash MODEL_NAME=mnist -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/mnist/explain -d @./mnist_v2.json +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/mnist/explain -d @./mnist_v2.json ``` !!! success "Expected Output" @@ -564,7 +564,7 @@ kubectl patch isvc torchserve --type='json' -p '[{"op": "replace", "path": "/spe ``` ```bash -curl -v -H "Host: latest-torchserve-predictor-default.default.example.com" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d @./mnist.json +curl -v -H "Host: latest-torchserve-predictor-default.default.example.com" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d @./mnist.json ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/transformer/feast/README.md b/docs/modelserving/v1beta1/transformer/feast/README.md index 80310c4d0..f4a405b19 100644 --- a/docs/modelserving/v1beta1/transformer/feast/README.md +++ b/docs/modelserving/v1beta1/transformer/feast/README.md @@ -275,7 +275,7 @@ MODEL_NAME=sklearn-driver-transformer INPUT_PATH=@./driver-input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice $SERVICE_NAME -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict ``` !!! success "Expected output" diff --git a/docs/modelserving/v1beta1/transformer/torchserve_image_transformer/README.md b/docs/modelserving/v1beta1/transformer/torchserve_image_transformer/README.md index f199046a6..f20291905 100644 --- a/docs/modelserving/v1beta1/transformer/torchserve_image_transformer/README.md +++ b/docs/modelserving/v1beta1/transformer/torchserve_image_transformer/README.md @@ -182,7 +182,7 @@ MODEL_NAME=mnist INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice $SERVICE_NAME -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict ``` !!! success "Expected Output" @@ -306,7 +306,7 @@ MODEL_NAME=cifar10 INPUT_PATH=@./image.json SERVICE_HOSTNAME=$(kubectl get inferenceservice $SERVICE_NAME -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict ``` !!! success "Expected Output" diff --git a/docs/modelserving/v1beta1/triton/bert/README.md b/docs/modelserving/v1beta1/triton/bert/README.md index 01a990ac5..b6f656fa8 100644 --- a/docs/modelserving/v1beta1/triton/bert/README.md +++ b/docs/modelserving/v1beta1/triton/bert/README.md @@ -170,7 +170,7 @@ MODEL_NAME=bert-v2 INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservices bert-v2 -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" -d $INPUT_PATH http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/$MODEL_NAME:predict ``` !!! success "Expected output" diff --git a/docs/modelserving/v1beta1/triton/torchscript/README.md b/docs/modelserving/v1beta1/triton/torchscript/README.md index e1579142a..11df2e971 100644 --- a/docs/modelserving/v1beta1/triton/torchscript/README.md +++ b/docs/modelserving/v1beta1/triton/torchscript/README.md @@ -163,7 +163,7 @@ curl -O https://raw.githubusercontent.com/kserve/kserve/master/docs/samples/v1be MODEL_NAME=cifar10 INPUT_PATH=@./input.json SERVICE_HOSTNAME=$(kubectl get inferenceservice torchscript-cifar10 -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v2/models/${MODEL_NAME}/infer -d $INPUT_PATH ``` !!! success "Expected Output" ```{ .bash .no-copy } @@ -463,7 +463,7 @@ INPUT_PATH=@./image.json SERVICE_HOSTNAME=$(kubectl get inferenceservice $SERVICE_NAME -o jsonpath='{.status.url}' | cut -d "/" -f 3) -curl -v -H "Host: ${SERVICE_HOSTNAME}" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d $INPUT_PATH +curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/${MODEL_NAME}:predict -d $INPUT_PATH ``` !!! success "Expected Output"