diff --git a/samples/k8s-examples.md b/samples/k8s-examples.md index 6f7296198c..aad56ed2c4 100644 --- a/samples/k8s-examples.md +++ b/samples/k8s-examples.md @@ -18,7 +18,7 @@ MESH_IP ```bash - +```json '172.21.255.9' ``` @@ -44,7 +44,6 @@ cat ./models/sklearn-iris-gs.yaml kubectl create -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris created ``` @@ -52,7 +51,6 @@ kubectl create -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris condition met ``` @@ -60,7 +58,6 @@ kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} kubectl get model iris -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M . ``` ```json - { "conditions": [ { @@ -83,7 +80,6 @@ seldon model infer iris --inference-host ${MESH_IP}:80 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - { "model_name": "iris_1", "model_version": "1", @@ -113,7 +109,6 @@ seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP}:80 \ '{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' | jq -M . ``` ```json - { "modelName": "iris_1", "modelVersion": "1", @@ -138,7 +133,6 @@ seldon model infer iris --inference-mode grpc --inference-host ${MESH_IP}:80 \ kubectl get server mlserver -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M . ``` ```json - { "conditions": [ { @@ -161,7 +155,6 @@ kubectl get server mlserver -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M . kubectl delete -f ./models/sklearn-iris-gs.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io "iris" deleted ``` ### Experiment @@ -200,7 +193,6 @@ kubectl create -f ./models/sklearn1.yaml -n ${NAMESPACE} kubectl create -f ./models/sklearn2.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris created model.mlops.seldon.io/iris2 created ``` @@ -209,7 +201,6 @@ kubectl create -f ./models/sklearn2.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris condition met model.mlops.seldon.io/iris2 condition met ``` @@ -235,7 +226,6 @@ cat ./experiments/ab-default-model.yaml kubectl create -f ./experiments/ab-default-model.yaml -n ${NAMESPACE} ``` ```json - experiment.mlops.seldon.io/experiment-sample created ``` @@ -243,7 +233,6 @@ kubectl create -f ./experiments/ab-default-model.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s experiment --all -n ${NAMESPACE} ``` ```json - experiment.mlops.seldon.io/experiment-sample condition met ``` @@ -252,7 +241,6 @@ seldon model infer --inference-host ${MESH_IP}:80 -i 50 iris \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris2_1::25 :iris_1::25] ``` @@ -262,7 +250,6 @@ kubectl delete -f ./models/sklearn1.yaml -n ${NAMESPACE} kubectl delete -f ./models/sklearn2.yaml -n ${NAMESPACE} ``` ```json - experiment.mlops.seldon.io "experiment-sample" deleted model.mlops.seldon.io "iris" deleted model.mlops.seldon.io "iris2" deleted @@ -300,7 +287,6 @@ kubectl create -f ./models/tfsimple1.yaml -n ${NAMESPACE} kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/tfsimple1 created model.mlops.seldon.io/tfsimple2 created ``` @@ -309,7 +295,6 @@ kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/tfsimple1 condition met model.mlops.seldon.io/tfsimple2 condition met ``` @@ -340,7 +325,6 @@ cat ./pipelines/tfsimples.yaml kubectl create -f ./pipelines/tfsimples.yaml -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io/tfsimples created ``` @@ -348,7 +332,6 @@ kubectl create -f ./pipelines/tfsimples.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s pipeline --all -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io/tfsimples condition met ``` @@ -357,7 +340,6 @@ seldon pipeline infer tfsimples --inference-mode grpc --inference-host ${MESH_IP '{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -428,7 +410,6 @@ seldon pipeline infer tfsimples --inference-mode grpc --inference-host ${MESH_IP kubectl delete -f ./pipelines/tfsimples.yaml -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io "tfsimples" deleted ``` @@ -437,7 +418,6 @@ kubectl delete -f ./models/tfsimple1.yaml -n ${NAMESPACE} kubectl delete -f ./models/tfsimple2.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io "tfsimple1" deleted model.mlops.seldon.io "tfsimple2" deleted ``` @@ -485,7 +465,6 @@ kubectl create -f ./models/tfsimple2.yaml -n ${NAMESPACE} kubectl create -f ./models/tfsimple3.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/tfsimple1 created model.mlops.seldon.io/tfsimple2 created model.mlops.seldon.io/tfsimple3 created @@ -495,7 +474,6 @@ kubectl create -f ./models/tfsimple3.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/tfsimple1 condition met model.mlops.seldon.io/tfsimple2 condition met model.mlops.seldon.io/tfsimple3 condition met @@ -529,7 +507,6 @@ cat ./pipelines/tfsimples-join.yaml kubectl create -f ./pipelines/tfsimples-join.yaml -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io/join created ``` @@ -537,7 +514,6 @@ kubectl create -f ./pipelines/tfsimples-join.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s pipeline --all -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io/join condition met ``` @@ -546,7 +522,6 @@ seldon pipeline infer join --inference-mode grpc --inference-host ${MESH_IP}:80 '{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -617,7 +592,6 @@ seldon pipeline infer join --inference-mode grpc --inference-host ${MESH_IP}:80 kubectl delete -f ./pipelines/tfsimples-join.yaml -n ${NAMESPACE} ``` ```json - pipeline.mlops.seldon.io "join" deleted ``` @@ -627,7 +601,6 @@ kubectl delete -f ./models/tfsimple2.yaml -n ${NAMESPACE} kubectl delete -f ./models/tfsimple3.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io "tfsimple1" deleted model.mlops.seldon.io "tfsimple2" deleted model.mlops.seldon.io "tfsimple3" deleted @@ -653,7 +626,6 @@ cat ./models/income.yaml kubectl create -f ./models/income.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/income created ``` @@ -661,7 +633,6 @@ kubectl create -f ./models/income.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/income condition met ``` @@ -669,7 +640,6 @@ kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} kubectl get model income -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M . ``` ```json - { "conditions": [ { @@ -692,7 +662,6 @@ seldon model infer income --inference-host ${MESH_IP}:80 \ '{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}' ``` ```json - { "model_name": "income_1", "model_version": "1", @@ -736,7 +705,6 @@ cat ./models/income-explainer.yaml kubectl create -f ./models/income-explainer.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/income-explainer created ``` @@ -744,7 +712,6 @@ kubectl create -f ./models/income-explainer.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/income condition met model.mlops.seldon.io/income-explainer condition met ``` @@ -753,7 +720,6 @@ kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} kubectl get model income-explainer -n ${NAMESPACE} -o jsonpath='{.status}' | jq -M . ``` ```json - { "conditions": [ { @@ -776,7 +742,6 @@ seldon model infer income-explainer --inference-host ${MESH_IP}:80 \ '{"inputs": [{"name": "predict", "shape": [1, 12], "datatype": "FP32", "data": [[47,4,1,1,1,3,4,1,0,0,40,9]]}]}' ``` ```json - { "model_name": "income-explainer_1", "model_version": "1", @@ -809,7 +774,6 @@ kubectl delete -f ./models/income.yaml -n ${NAMESPACE} kubectl delete -f ./models/income-explainer.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io "income" deleted model.mlops.seldon.io "income-explainer" deleted ``` @@ -836,7 +800,6 @@ cat ./servers/custom-mlserver.yaml kubectl create -f ./servers/custom-mlserver.yaml -n ${NAMESPACE} ``` ```json - server.mlops.seldon.io/mlserver-custom created ``` @@ -844,7 +807,6 @@ kubectl create -f ./servers/custom-mlserver.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s server --all -n ${NAMESPACE} ``` ```json - server.mlops.seldon.io/mlserver condition met server.mlops.seldon.io/mlserver-custom condition met server.mlops.seldon.io/triton condition met @@ -867,7 +829,6 @@ cat ./models/iris-custom-server.yaml kubectl create -f ./models/iris-custom-server.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris created ``` @@ -875,7 +836,6 @@ kubectl create -f ./models/iris-custom-server.yaml -n ${NAMESPACE} kubectl wait --for condition=ready --timeout=300s model --all -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io/iris condition met ``` @@ -884,7 +844,6 @@ seldon model infer iris --inference-host ${MESH_IP}:80 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - { "model_name": "iris_1", "model_version": "1", @@ -913,7 +872,6 @@ seldon model infer iris --inference-host ${MESH_IP}:80 \ kubectl delete -f ./models/iris-custom-server.yaml -n ${NAMESPACE} ``` ```json - model.mlops.seldon.io "iris" deleted ``` @@ -921,7 +879,6 @@ kubectl delete -f ./models/iris-custom-server.yaml -n ${NAMESPACE} kubectl delete -f ./servers/custom-mlserver.yaml -n ${NAMESPACE} ``` ```json - server.mlops.seldon.io "mlserver-custom" deleted ``` diff --git a/samples/local-experiments.ipynb b/samples/local-experiments.ipynb index 42a4f8b5b6..2e258bb129 100644 --- a/samples/local-experiments.ipynb +++ b/samples/local-experiments.ipynb @@ -1421,12 +1421,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "seldon_model_infer_total{code=\"200\",method_type=\"rest\",model=\"iris\",model_internal=\"iris2_1\",server=\"mlserver\",server_replica=\"0\"} 50\r\n" + "seldon_model_infer_api_seconds_count{code=\"200\",method_type=\"rest\",model=\"iris\",model_internal=\"iris2_1\",server=\"mlserver\",server_replica=\"0\"} 50\r\n" ] } ], "source": [ - "!curl -s 0.0.0:9006/metrics | grep seldon_model_infer_total | grep iris2_1" + "!curl -s 0.0.0:9006/metrics | grep seldon_model_infer_api_seconds_count | grep iris2_1" ] }, { @@ -1828,12 +1828,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "seldon_model_infer_total{code=\"OK\",method_type=\"grpc\",model=\"mul10\",model_internal=\"mul10_1\",server=\"triton\",server_replica=\"0\"} 52\r\n" + "seldon_model_infer_api_seconds_count{code=\"OK\",method_type=\"grpc\",model=\"mul10\",model_internal=\"mul10_1\",server=\"triton\",server_replica=\"0\"} 52\r\n" ] } ], "source": [ - "!curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep mul10_1" + "!curl -s 0.0.0:9007/metrics | grep seldon_model_infer_api_seconds_count | grep mul10_1" ] }, { diff --git a/samples/local-experiments.md b/samples/local-experiments.md index dffe318af2..2198fcc752 100644 --- a/samples/local-experiments.md +++ b/samples/local-experiments.md @@ -41,7 +41,6 @@ seldon model load -f ./models/sklearn1.yaml seldon model load -f ./models/sklearn2.yaml ``` ```json - {} {} ``` @@ -53,7 +52,6 @@ seldon model status iris -w ModelAvailable seldon model status iris2 -w ModelAvailable ``` ```json - {} {} ``` @@ -63,7 +61,6 @@ seldon model infer iris -i 50 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris_1::50] ``` @@ -72,7 +69,6 @@ seldon model infer iris2 -i 50 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris2_1::50] ``` Create an experiment that modifies the iris model to add a second model splitting traffic 50/50 between the two. @@ -101,7 +97,6 @@ Start the experiment. seldon experiment start -f ./experiments/ab-default-model.yaml ``` ```json - {} ``` Wait for the experiment to be ready. @@ -111,7 +106,6 @@ Wait for the experiment to be ready. seldon experiment status experiment-sample -w | jq -M . ``` ```json - { "experimentName": "experiment-sample", "active": true, @@ -129,7 +123,6 @@ seldon model infer iris -i 50 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris2_1::19 :iris_1::31] ``` Show sticky session header `x-seldon-route` that is returned @@ -140,7 +133,6 @@ seldon model infer iris --show-headers \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - Request header Content-Type:[application/json] Request header Seldon-Model:[iris] Response header Ce-Requestid:[0757e893-64c9-411f-8937-f0f4774852ef] @@ -190,7 +182,6 @@ seldon model infer iris -s -i 50 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris_1::50] ``` @@ -199,7 +190,6 @@ seldon model infer iris --inference-mode grpc -s -i 50\ '{"model_name":"iris","inputs":[{"name":"input","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[1,4]}]}' ``` ```json - map[:iris_1::50] ``` Stop the experiment @@ -209,7 +199,6 @@ Stop the experiment seldon experiment stop experiment-sample ``` ```json - {} ``` Unload both models. @@ -220,7 +209,6 @@ seldon model unload iris seldon model unload iris2 ``` ```json - {} {} ``` @@ -262,7 +250,6 @@ seldon model load -f ./models/add10.yaml seldon model load -f ./models/mul10.yaml ``` ```json - {} {} ``` @@ -272,7 +259,6 @@ seldon model status add10 -w ModelAvailable seldon model status mul10 -w ModelAvailable ``` ```json - {} {} ``` @@ -314,7 +300,6 @@ seldon pipeline load -f ./pipelines/add10.yaml seldon pipeline load -f ./pipelines/mul10.yaml ``` ```json - {} {} ``` @@ -324,7 +309,6 @@ seldon pipeline status pipeline-add10 -w PipelineReady seldon pipeline status pipeline-mul10 -w PipelineReady ``` ```json - {"pipelineName":"pipeline-add10","versions":[{"pipeline":{"name":"pipeline-add10","uid":"cc6bmcs5em8of75v7pi0","version":1,"steps":[{"name":"add10"}],"output":{"steps":["add10.outputs"]},"kubernetesMeta":{}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2022-08-29T13:12:19.395809013Z"}}]} {"pipelineName":"pipeline-mul10","versions":[{"pipeline":{"name":"pipeline-mul10","uid":"cc6bmcs5em8of75v7pig","version":1,"steps":[{"name":"mul10"}],"output":{"steps":["mul10.outputs"]},"kubernetesMeta":{}},"state":{"pipelineVersion":1,"status":"PipelineReady","reason":"created pipeline","lastChangeTimestamp":"2022-08-29T13:12:19.632179449Z"}}]} ``` @@ -334,7 +318,6 @@ seldon pipeline infer pipeline-add10 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -364,7 +347,6 @@ seldon pipeline infer pipeline-mul10 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -411,7 +393,6 @@ cat ./experiments/addmul10.yaml seldon experiment start -f ./experiments/addmul10.yaml ``` ```json - {} ``` @@ -419,7 +400,6 @@ seldon experiment start -f ./experiments/addmul10.yaml seldon experiment status addmul10 -w | jq -M . ``` ```json - { "experimentName": "addmul10", "active": true, @@ -435,7 +415,6 @@ seldon pipeline infer pipeline-add10 -i 50 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - map[:add10_1::25 :mul10_1::25 :pipeline-add10.pipeline::25 :pipeline-mul10.pipeline::25] ``` Use sticky session key passed by last infer request to ensure same route is taken each time. @@ -446,7 +425,6 @@ seldon pipeline infer pipeline-add10 --show-headers --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - Request metadata seldon-model:[pipeline-add10.pipeline] {"outputs":[{"name":"OUTPUT","datatype":"FP32","shape":["4"],"contents":{"fp32Contents":[11,12,13,14]}}],"rawOutputContents":["AAAwQQAAQEEAAFBBAABgQQ=="]} Response header x-seldon-route:[:add10_1: :pipeline-add10.pipeline:] @@ -464,7 +442,6 @@ seldon pipeline infer pipeline-add10 -s --show-headers --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - Request metadata x-seldon-route:[:add10_1: :pipeline-add10.pipeline:] Request metadata seldon-model:[pipeline-add10.pipeline] {"outputs":[{"name":"OUTPUT","datatype":"FP32","shape":["4"],"contents":{"fp32Contents":[11,12,13,14]}}],"rawOutputContents":["AAAwQQAAQEEAAFBBAABgQQ=="]} @@ -483,7 +460,6 @@ seldon pipeline infer pipeline-add10 -s -i 50 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - map[:add10_1::50 :pipeline-add10.pipeline::150] ``` @@ -506,7 +482,6 @@ cat ./models/add20.yaml seldon model load -f ./models/add20.yaml ``` ```json - {} ``` @@ -514,7 +489,6 @@ seldon model load -f ./models/add20.yaml seldon model status add20 -w ModelAvailable ``` ```json - {} ``` @@ -539,7 +513,6 @@ cat ./experiments/add1020.yaml seldon experiment start -f ./experiments/add1020.yaml ``` ```json - {} ``` @@ -547,7 +520,6 @@ seldon experiment start -f ./experiments/add1020.yaml seldon experiment status add1020 -w | jq -M . ``` ```json - { "experimentName": "add1020", "active": true, @@ -563,7 +535,6 @@ seldon model infer add10 -i 50 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - map[:add10_1::20 :add20_1::30] ``` @@ -572,7 +543,6 @@ seldon pipeline infer pipeline-add10 -i 100 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - map[:add10_1::27 :add20_1::31 :mul10_1::42 :pipeline-add10.pipeline::58 :pipeline-mul10.pipeline::42] ``` @@ -581,7 +551,6 @@ seldon pipeline infer pipeline-add10 --show-headers --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - Request metadata seldon-model:[pipeline-add10.pipeline] {"outputs":[{"name":"OUTPUT","datatype":"FP32","shape":["4"],"contents":{"fp32Contents":[10,20,30,40]}}],"rawOutputContents":["AAAgQQAAoEEAAPBBAAAgQg=="]} Response header x-forwarded-proto:[http] @@ -599,7 +568,6 @@ seldon pipeline infer pipeline-add10 -s --show-headers --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - Request metadata x-seldon-route:[:mul10_1: :pipeline-mul10.pipeline:] Request metadata seldon-model:[pipeline-add10.pipeline] {"outputs":[{"name":"OUTPUT","datatype":"FP32","shape":["4"],"contents":{"fp32Contents":[10,20,30,40]}}],"rawOutputContents":["AAAgQQAAoEEAAPBBAAAgQg=="]} @@ -623,7 +591,6 @@ seldon model unload add20 seldon model unload mul10 ``` ```json - {} {} {} @@ -672,7 +639,6 @@ seldon model load -f ./models/sklearn1.yaml seldon model load -f ./models/sklearn2.yaml ``` ```json - {} {} ``` @@ -684,7 +650,6 @@ seldon model status iris -w ModelAvailable seldon model status iris2 -w ModelAvailable ``` ```json - {} {} ``` @@ -716,7 +681,6 @@ Start the experiment. seldon experiment start -f ./experiments/sklearn-mirror.yaml ``` ```json - {} ``` Wait for the experiment to be ready. @@ -726,7 +690,6 @@ Wait for the experiment to be ready. seldon experiment status sklearn-mirror -w | jq -M . ``` ```json - { "experimentName": "sklearn-mirror", "active": true, @@ -744,17 +707,16 @@ seldon model infer iris -i 50 \ '{"inputs": [{"name": "predict", "shape": [1, 4], "datatype": "FP32", "data": [[1, 2, 3, 4]]}]}' ``` ```json - map[:iris_1::50] ``` We can check the local prometheus port from the agent to validate requests went to iris2 ```bash -curl -s 0.0.0:9006/metrics | grep seldon_model_infer_total | grep iris2_1 +curl -s 0.0.0:9006/metrics | grep seldon_model_infer_api_seconds_count | grep iris2_1 ``` - - seldon_model_infer_total{code="200",method_type="rest",model="iris",model_internal="iris2_1",server="mlserver",server_replica="0"} 50 +```json + seldon_model_infer_api_seconds_count{code="200",method_type="rest",model="iris",model_internal="iris2_1",server="mlserver",server_replica="0"} 50 ``` Stop the experiment @@ -763,7 +725,6 @@ Stop the experiment seldon experiment stop sklearn-mirror ``` ```json - {} ``` Unload both models. @@ -774,7 +735,6 @@ seldon model unload iris seldon model unload iris2 ``` ```json - {} {} ``` @@ -816,7 +776,6 @@ seldon model load -f ./models/add10.yaml seldon model load -f ./models/mul10.yaml ``` ```json - {} {} ``` @@ -826,7 +785,6 @@ seldon model status add10 -w ModelAvailable seldon model status mul10 -w ModelAvailable ``` ```json - {} {} ``` @@ -868,7 +826,6 @@ seldon pipeline load -f ./pipelines/add10.yaml seldon pipeline load -f ./pipelines/mul10.yaml ``` ```json - {} {} ``` @@ -878,7 +835,6 @@ seldon pipeline status pipeline-add10 -w PipelineReady seldon pipeline status pipeline-mul10 -w PipelineReady ``` ```json - {"pipelineName":"pipeline-add10", "versions":[{"pipeline":{"name":"pipeline-add10", "uid":"cc0a78ui50579svh4i5g", "version":1, "steps":[{"name":"add10"}], "output":{"steps":["add10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"Created pipeline", "lastChangeTimestamp":"2022-08-20T09:10:25.432802482Z"}}]} {"pipelineName":"pipeline-mul10", "versions":[{"pipeline":{"name":"pipeline-mul10", "uid":"cc0a78ui50579svh4i60", "version":1, "steps":[{"name":"mul10"}], "output":{"steps":["mul10.outputs"]}, "kubernetesMeta":{}}, "state":{"pipelineVersion":1, "status":"PipelineReady", "reason":"Created pipeline", "lastChangeTimestamp":"2022-08-20T09:10:26.057188908Z"}}]} ``` @@ -888,7 +844,6 @@ seldon pipeline infer pipeline-add10 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - {"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[11, 12, 13, 14]}}], "rawOutputContents":["AAAwQQAAQEEAAFBBAABgQQ=="]} ``` @@ -897,7 +852,6 @@ seldon pipeline infer pipeline-mul10 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - {"outputs":[{"name":"OUTPUT", "datatype":"FP32", "shape":["4"], "contents":{"fp32Contents":[10, 20, 30, 40]}}], "rawOutputContents":["AAAgQQAAoEEAAPBBAAAgQg=="]} ``` @@ -925,7 +879,6 @@ cat ./experiments/addmul10-mirror.yaml seldon experiment start -f ./experiments/addmul10-mirror.yaml ``` ```json - {} ``` @@ -933,7 +886,6 @@ seldon experiment start -f ./experiments/addmul10-mirror.yaml seldon experiment status addmul10-mirror -w | jq -M . ``` ```json - { "experimentName": "addmul10-mirror", "active": true, @@ -949,17 +901,16 @@ seldon pipeline infer pipeline-add10 -i 50 --inference-mode grpc \ '{"model_name":"add10","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' ``` ```json - map[:add10_1::50 :pipeline-add10.pipeline::50] ``` Let's check that the mul10 model was called. ```bash -curl -s 0.0.0:9007/metrics | grep seldon_model_infer_total | grep mul10_1 +curl -s 0.0.0:9007/metrics | grep seldon_model_infer_api_seconds_count | grep mul10_1 ``` - - seldon_model_infer_total{code="OK",method_type="grpc",model="mul10",model_internal="mul10_1",server="triton",server_replica="0"} 52 +```json + seldon_model_infer_api_seconds_count{code="OK",method_type="grpc",model="mul10",model_internal="mul10_1",server="triton",server_replica="0"} 52 ``` ```bash @@ -970,7 +921,6 @@ seldon model unload add10 seldon model unload mul10 ``` ```json - {} {} {} diff --git a/samples/pipeline-examples.md b/samples/pipeline-examples.md index 116874b483..f4c4734acc 100644 --- a/samples/pipeline-examples.md +++ b/samples/pipeline-examples.md @@ -42,7 +42,6 @@ seldon model load -f ./models/tfsimple1.yaml seldon model load -f ./models/tfsimple2.yaml ``` ```json - {} {} ``` @@ -52,7 +51,6 @@ seldon model status tfsimple1 -w ModelAvailable | jq -M . seldon model status tfsimple2 -w ModelAvailable | jq -M . ``` ```json - {} {} ``` @@ -87,7 +85,6 @@ cat ./pipelines/tfsimples.yaml seldon pipeline load -f ./pipelines/tfsimples.yaml ``` ```json - {} ``` @@ -95,7 +92,6 @@ seldon pipeline load -f ./pipelines/tfsimples.yaml seldon pipeline status tfsimples -w PipelineReady| jq -M . ``` ```json - { "pipelineName": "tfsimples", "versions": [ @@ -142,7 +138,6 @@ seldon pipeline infer tfsimples \ '{"inputs":[{"name":"INPUT0","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","data":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16],"datatype":"INT32","shape":[1,16]}]}' | jq -M . ``` ```json - { "model_name": "", "outputs": [ @@ -207,7 +202,6 @@ seldon pipeline infer tfsimples --inference-mode grpc \ '{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -276,7 +270,6 @@ We use the Seldon CLI `pipeline inspect` feature to look at the data for all ste seldon pipeline inspect tfsimples ``` ```json - seldon.default.model.tfsimple1.inputs cd3fu5jf39s357kil0gg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]}}]} seldon.default.model.tfsimple1.outputs cd3fu5jf39s357kil0gg {"modelName":"tfsimple1_1","modelVersion":"1","outputs":[{"name":"OUTPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"OUTPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}]} seldon.default.model.tfsimple2.inputs cd3fu5jf39s357kil0gg {"inputs":[{"name":"INPUT0","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32]}},{"name":"INPUT1","datatype":"INT32","shape":["1","16"],"contents":{"intContents":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]}}],"rawInputContents":["AgAAAAQAAAAGAAAACAAAAAoAAAAMAAAADgAAABAAAAASAAAAFAAAABYAAAAYAAAAGgAAABwAAAAeAAAAIAAAAA==","AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA=="]} @@ -291,7 +284,6 @@ Next, we look get the output as json and use the `jq` tool to get just one value seldon pipeline inspect tfsimples --format json | jq -M .topics[0].msgs[0].value ``` ```json - { "inputs": [ { @@ -358,7 +350,6 @@ seldon pipeline inspect tfsimples --format json | jq -M .topics[0].msgs[0].value seldon pipeline unload tfsimples ``` ```json - {} ``` @@ -367,7 +358,6 @@ seldon model unload tfsimple1 seldon model unload tfsimple2 ``` ```json - {} {} ``` @@ -421,7 +411,6 @@ seldon model load -f ./models/tfsimple2.yaml seldon model load -f ./models/tfsimple3.yaml ``` ```json - {} {} {} @@ -433,7 +422,6 @@ seldon model status tfsimple2 -w ModelAvailable | jq -M . seldon model status tfsimple3 -w ModelAvailable | jq -M . ``` ```json - {} {} {} @@ -469,7 +457,6 @@ cat ./pipelines/tfsimples-join.yaml seldon pipeline load -f ./pipelines/tfsimples-join.yaml ``` ```json - {} ``` @@ -477,7 +464,6 @@ seldon pipeline load -f ./pipelines/tfsimples-join.yaml seldon pipeline status join -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "join", "versions": [ @@ -530,7 +516,6 @@ seldon pipeline infer join --inference-mode grpc \ '{"model_name":"simple","inputs":[{"name":"INPUT0","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]},{"name":"INPUT1","contents":{"int_contents":[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]},"datatype":"INT32","shape":[1,16]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -597,7 +582,6 @@ seldon pipeline infer join --inference-mode grpc \ seldon pipeline unload join ``` ```json - {} ``` @@ -607,7 +591,6 @@ seldon model unload tfsimple2 seldon model unload tfsimple3 ``` ```json - {} {} {} @@ -662,7 +645,6 @@ seldon model load -f ./models/add10.yaml seldon model load -f ./models/mul10.yaml ``` ```json - {} {} {} @@ -674,7 +656,6 @@ seldon model status add10 -w ModelAvailable | jq -M . seldon model status mul10 -w ModelAvailable | jq -M . ``` ```json - {} {} {} @@ -714,7 +695,6 @@ cat ./pipelines/conditional.yaml seldon pipeline load -f ./pipelines/conditional.yaml ``` ```json - {} ``` @@ -722,7 +702,6 @@ seldon pipeline load -f ./pipelines/conditional.yaml seldon pipeline status tfsimple-conditional -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "tfsimple-conditional", "versions": [ @@ -781,7 +760,6 @@ seldon pipeline infer tfsimple-conditional --inference-mode grpc \ '{"model_name":"conditional","inputs":[{"name":"CHOICE","contents":{"int_contents":[0]},"datatype":"INT32","shape":[1]},{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -810,7 +788,6 @@ seldon pipeline infer tfsimple-conditional --inference-mode grpc \ '{"model_name":"conditional","inputs":[{"name":"CHOICE","contents":{"int_contents":[1]},"datatype":"INT32","shape":[1]},{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -836,7 +813,6 @@ seldon pipeline infer tfsimple-conditional --inference-mode grpc \ seldon pipeline unload tfsimple-conditional ``` ```json - {} ``` @@ -846,7 +822,6 @@ seldon model unload add10 seldon model unload mul10 ``` ```json - {} {} {} @@ -887,7 +862,6 @@ seldon model load -f ./models/mul10.yaml seldon model load -f ./models/add10.yaml ``` ```json - {} {} ``` @@ -897,7 +871,6 @@ seldon model status mul10 -w ModelAvailable | jq -M . seldon model status add10 -w ModelAvailable | jq -M . ``` ```json - {} {} ``` @@ -935,7 +908,6 @@ cat ./pipelines/pipeline-inputs.yaml seldon pipeline load -f ./pipelines/pipeline-inputs.yaml ``` ```json - {} ``` @@ -943,7 +915,6 @@ seldon pipeline load -f ./pipelines/pipeline-inputs.yaml seldon pipeline status pipeline-inputs -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "pipeline-inputs", "versions": [ @@ -996,7 +967,6 @@ seldon pipeline infer pipeline-inputs --inference-mode grpc \ '{"model_name":"pipeline","inputs":[{"name":"INPUT0","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]},{"name":"INPUT1","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -1037,7 +1007,6 @@ seldon pipeline infer pipeline-inputs --inference-mode grpc \ seldon pipeline unload pipeline-inputs ``` ```json - {} ``` @@ -1046,7 +1015,6 @@ seldon model unload mul10 seldon model unload add10 ``` ```json - {} {} ``` @@ -1085,7 +1053,6 @@ seldon model load -f ./models/mul10.yaml seldon model load -f ./models/add10.yaml ``` ```json - {} {} ``` @@ -1095,7 +1062,6 @@ seldon model status mul10 -w ModelAvailable | jq -M . seldon model status add10 -w ModelAvailable | jq -M . ``` ```json - {} {} ``` @@ -1135,7 +1101,6 @@ cat ./pipelines/trigger-joins.yaml seldon pipeline load -f ./pipelines/trigger-joins.yaml ``` ```json - {} ``` @@ -1143,7 +1108,6 @@ seldon pipeline load -f ./pipelines/trigger-joins.yaml seldon pipeline status trigger-joins -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "trigger-joins", "versions": [ @@ -1199,7 +1163,6 @@ seldon pipeline infer trigger-joins --inference-mode grpc \ '{"model_name":"pipeline","inputs":[{"name":"ok1","contents":{"fp32_contents":[1]},"datatype":"FP32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -1226,7 +1189,6 @@ seldon pipeline infer trigger-joins --inference-mode grpc \ '{"model_name":"pipeline","inputs":[{"name":"ok3","contents":{"fp32_contents":[1]},"datatype":"FP32","shape":[1]},{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -1252,7 +1214,6 @@ seldon pipeline infer trigger-joins --inference-mode grpc \ seldon pipeline unload trigger-joins ``` ```json - {} ``` @@ -1261,7 +1222,6 @@ seldon model unload mul10 seldon model unload add10 ``` ```json - {} {} ``` diff --git a/samples/pipeline-versions.md b/samples/pipeline-versions.md index 227b45e443..fba7340661 100644 --- a/samples/pipeline-versions.md +++ b/samples/pipeline-versions.md @@ -5,7 +5,7 @@ ```bash which seldon ``` - +```json /home/clive/work/scv2/seldon-core-v2/operator/bin/seldon ``` ### Model Join @@ -18,7 +18,6 @@ seldon model load -f ./models/add10.yaml seldon model load -f ./models/mul10.yaml ``` ```json - {} {} ``` @@ -28,7 +27,6 @@ seldon model status add10 -w ModelAvailable | jq -M . seldon model status mul10 -w ModelAvailable | jq -M . ``` ```json - {} {} ``` @@ -53,7 +51,6 @@ cat ./pipelines/version-test-a.yaml seldon pipeline load -f ./pipelines/version-test-a.yaml ``` ```json - {} ``` @@ -61,7 +58,6 @@ seldon pipeline load -f ./pipelines/version-test-a.yaml seldon pipeline status version-test -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "version-test", "versions": [ @@ -98,7 +94,6 @@ seldon pipeline infer version-test --inference-mode grpc \ '{"model_name":"outlier","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -143,7 +138,6 @@ cat ./pipelines/version-test-b.yaml seldon pipeline load -f ./pipelines/version-test-b.yaml ``` ```json - {} ``` @@ -151,7 +145,6 @@ seldon pipeline load -f ./pipelines/version-test-b.yaml seldon pipeline status version-test -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "version-test", "versions": [ @@ -188,7 +181,6 @@ seldon pipeline infer version-test --inference-mode grpc \ '{"model_name":"outlier","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -217,7 +209,6 @@ seldon pipeline infer version-test --inference-mode grpc \ seldon pipeline load -f ./pipelines/version-test-a.yaml ``` ```json - {} ``` @@ -225,7 +216,6 @@ seldon pipeline load -f ./pipelines/version-test-a.yaml seldon pipeline status version-test -w PipelineReady | jq -M . ``` ```json - { "pipelineName": "version-test", "versions": [ @@ -262,7 +252,6 @@ seldon pipeline infer version-test --inference-mode grpc \ '{"model_name":"outlier","inputs":[{"name":"INPUT","contents":{"fp32_contents":[1,2,3,4]},"datatype":"FP32","shape":[4]}]}' | jq -M . ``` ```json - { "outputs": [ { @@ -291,7 +280,6 @@ seldon pipeline infer version-test --inference-mode grpc \ seldon pipeline unload version-test ``` ```json - {} ``` @@ -300,7 +288,6 @@ seldon model unload add10 seldon model unload mul10 ``` ```json - {} {} ``` diff --git a/samples/trigger_joins.md b/samples/trigger_joins.md index fcb05eb28b..ff84e9ea24 100644 --- a/samples/trigger_joins.md +++ b/samples/trigger_joins.md @@ -31,7 +31,6 @@ seldon model load -f ./models/id2_node.yaml seldon model load -f ./models/join_node.yaml ``` ```json - {} {} {} @@ -43,7 +42,6 @@ seldon model status id1_node -w ModelAvailable seldon model status id2_node -w ModelAvailable ``` ```json - {} {} {} @@ -53,7 +51,6 @@ seldon model status id2_node -w ModelAvailable seldon pipeline load -f ./pipelines/triggers_join_inputs.yaml ``` ```json - {} ``` @@ -61,7 +58,6 @@ seldon pipeline load -f ./pipelines/triggers_join_inputs.yaml seldon pipeline status triggers_join_inputs -w PipelineReady | jq . ``` ```json - { "pipelineName": "triggers_join_inputs", "versions": [ @@ -130,7 +126,7 @@ request_string = get_request_string(use_trigger_1=True, use_trigger_2=True) seldon pipeline infer triggers_join_inputs --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -139,7 +135,7 @@ request_string = get_request_string(use_trigger_1=True, use_trigger_2=False) seldon pipeline infer triggers_join_inputs --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -148,7 +144,7 @@ request_string = get_request_string(use_trigger_1=False, use_trigger_2=True) seldon pipeline infer triggers_join_inputs --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -156,7 +152,6 @@ seldon pipeline infer triggers_join_inputs --inference-mode grpc '{request_strin seldon pipeline unload triggers_join_inputs ``` ```json - {} ``` @@ -164,7 +159,6 @@ seldon pipeline unload triggers_join_inputs seldon pipeline load -f ./pipelines/triggers_join_internal.yaml ``` ```json - {} ``` @@ -172,7 +166,6 @@ seldon pipeline load -f ./pipelines/triggers_join_internal.yaml seldon pipeline status triggers_join_internal -w PipelineReady | jq . ``` ```json - { "pipelineName": "triggers_join_internal", "versions": [ @@ -269,7 +262,7 @@ request_string = get_request_string(use_trigger_1=True, use_trigger_2=True) seldon pipeline infer triggers_join_internal --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -278,7 +271,7 @@ request_string = get_request_string(use_trigger_1=True, use_trigger_2=False) seldon pipeline infer triggers_join_internal --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -287,7 +280,7 @@ request_string = get_request_string(use_trigger_1=False, use_trigger_2=True) seldon pipeline infer triggers_join_internal --inference-mode grpc '{request_string}' ``` - +```json {"outputs":[{"name":"OUTPUT1","datatype":"INT64","shape":["1"],"contents":{"int64Contents":["2"]}}],"rawOutputContents":["AgAAAAAAAAA="]} ``` @@ -295,7 +288,6 @@ seldon pipeline infer triggers_join_internal --inference-mode grpc '{request_str seldon pipeline unload triggers_join_internal ``` ```json - {} ``` @@ -305,7 +297,6 @@ seldon model unload id2_node seldon model unload join_node ``` ```json - {} {} {} diff --git a/scheduler/pkg/metrics/agent.go b/scheduler/pkg/metrics/agent.go index f8d1908554..c8efb7d9fd 100644 --- a/scheduler/pkg/metrics/agent.go +++ b/scheduler/pkg/metrics/agent.go @@ -14,27 +14,28 @@ import ( "google.golang.org/grpc/status" ) +// Model metrics const ( + modelHistogramName = "seldon_model_infer_api_seconds" + modelAggregateInferCounterName = "seldon_model_aggregate_infer_total" + modelAggregateInferLatencyCounterName = "seldon_model_aggregate_infer_seconds_total" +) - // start list of metrics - // Model metrics - ModelHistogramName = "seldon_model_infer_api_seconds" - ModelInferCounterName = "seldon_model_infer_total" - ModelInferLatencyCounterName = "seldon_model_infer_seconds_total" - ModelAggregateInferCounterName = "seldon_model_aggregate_infer_total" - ModelAggregateInferLatencyCounterName = "seldon_model_aggregate_infer_seconds_total" - // Agent metrics - CacheEvictCounterName = "seldon_cache_evict_count" - CacheMissCounterName = "seldon_cache_miss_count" - LoadModelCounterName = "seldon_load_model_counter" - UnloadModelCounterName = "seldon_unload_model_counter" - LoadedModelGaugeName = "seldon_loaded_model_gauge" - LoadedModelMemoryGaugeName = "seldon_loaded_model_memory_bytes_gauge" - EvictedModelMemoryGaugeName = "seldon_evicted_model_memory_bytes_gauge" - ServerReplicaMemoryCapacityGaugeName = "seldon_server_replica_memory_capacity_bytes_gauge" - ServerReplicaMemoryCapacityWithOverCommitGaugeName = "seldon_server_replica_memory_capacity_overcommit_bytes_gauge" - // end list of metrics +// Agent metrics +const ( + cacheEvictCounterName = "seldon_cache_evict_count" + cacheMissCounterName = "seldon_cache_miss_count" + loadModelCounterName = "seldon_load_model_counter" + unloadModelCounterName = "seldon_unload_model_counter" + loadedModelGaugeName = "seldon_loaded_model_gauge" + loadedModelMemoryGaugeName = "seldon_loaded_model_memory_bytes_gauge" + evictedModelMemoryGaugeName = "seldon_evicted_model_memory_bytes_gauge" + serverReplicaMemoryCapacityGaugeName = "seldon_server_replica_memory_capacity_bytes_gauge" + serverReplicaMemoryCapacityWithOverCommitGaugeName = "seldon_server_replica_memory_capacity_overcommit_bytes_gauge" +) +// Metric labels +const ( SeldonModelMetric = "model" SeldonInternalModelMetric = "model_internal" SeldonServerMetric = "server" @@ -65,8 +66,6 @@ type PrometheusMetrics struct { logger log.FieldLogger // Model metrics modelHistogram *prometheus.HistogramVec - modelInferCounter *prometheus.CounterVec - modelInferLatencyCounter *prometheus.CounterVec modelAggregateInferCounter *prometheus.CounterVec modelAggregateInferLatencyCounter *prometheus.CounterVec cacheEvictCounter *prometheus.CounterVec @@ -87,16 +86,6 @@ func NewPrometheusModelMetrics(serverName string, serverReplicaIdx uint, logger return nil, err } - inferCounter, err := createModelInferCounter() - if err != nil { - return nil, err - } - - inferLatencyCounter, err := createModelInferLatencyCounter() - if err != nil { - return nil, err - } - aggregateInferCounter, err := createModelAggregateInferCounter() if err != nil { @@ -158,8 +147,6 @@ func NewPrometheusModelMetrics(serverName string, serverReplicaIdx uint, logger serverReplicaIdx: fmt.Sprintf("%d", serverReplicaIdx), logger: logger.WithField("source", "PrometheusMetrics"), modelHistogram: histogram, - modelInferCounter: inferCounter, - modelInferLatencyCounter: inferLatencyCounter, modelAggregateInferCounter: aggregateInferCounter, modelAggregateInferLatencyCounter: aggregateInferLatencyCounter, cacheEvictCounter: cacheEvictCounter, @@ -180,7 +167,7 @@ func createModelHistogram() (*prometheus.HistogramVec, error) { histogram := prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: ModelHistogramName, + Name: modelHistogramName, Help: "A histogram of latencies for inference server", Buckets: DefaultHistogramBuckets, }, @@ -197,28 +184,10 @@ func createModelHistogram() (*prometheus.HistogramVec, error) { return histogram, nil } -func createModelInferCounter() (*prometheus.CounterVec, error) { - labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, SeldonModelMetric, SeldonInternalModelMetric, MethodTypeMetric, CodeMetric} - return createCounterVec( - ModelInferCounterName, - "A count of server inference calls", - labelNames, - ) -} - -func createModelInferLatencyCounter() (*prometheus.CounterVec, error) { - labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, SeldonModelMetric, SeldonInternalModelMetric, MethodTypeMetric, CodeMetric} - return createCounterVec( - ModelInferLatencyCounterName, - "A sum of server inference call latencies", - labelNames, - ) -} - func createModelAggregateInferCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, MethodTypeMetric} return createCounterVec( - ModelAggregateInferCounterName, + modelAggregateInferCounterName, "A count of server inference calls (aggregate)", labelNames, ) @@ -227,7 +196,7 @@ func createModelAggregateInferCounter() (*prometheus.CounterVec, error) { func createModelAggregateInferLatencyCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, MethodTypeMetric} return createCounterVec( - ModelAggregateInferLatencyCounterName, + modelAggregateInferLatencyCounterName, "A sum of server inference call latencies (aggregate)", labelNames, ) @@ -236,7 +205,7 @@ func createModelAggregateInferLatencyCounter() (*prometheus.CounterVec, error) { func createCacheEvictCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createCounterVec( - CacheEvictCounterName, + cacheEvictCounterName, "A count of model cache evict", labelNames, ) @@ -245,7 +214,7 @@ func createCacheEvictCounter() (*prometheus.CounterVec, error) { func createCacheMissCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createCounterVec( - CacheMissCounterName, + cacheMissCounterName, "A count of model cache miss", labelNames, ) @@ -254,7 +223,7 @@ func createCacheMissCounter() (*prometheus.CounterVec, error) { func createLoadModelCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createCounterVec( - LoadModelCounterName, + loadModelCounterName, "A count of model load", labelNames, ) @@ -263,7 +232,7 @@ func createLoadModelCounter() (*prometheus.CounterVec, error) { func createUnloadModelCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createCounterVec( - UnloadModelCounterName, + unloadModelCounterName, "A count of model unload", labelNames, ) @@ -272,7 +241,7 @@ func createUnloadModelCounter() (*prometheus.CounterVec, error) { func createLoadedModelGauge() (*prometheus.GaugeVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, SeldonInternalModelMetric} return createGaugeVec( - LoadedModelGaugeName, + loadedModelGaugeName, "A gauge of models loaded in the system", labelNames, ) @@ -281,7 +250,7 @@ func createLoadedModelGauge() (*prometheus.GaugeVec, error) { func createLoadedModelMemoryGauge() (*prometheus.GaugeVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, SeldonInternalModelMetric} return createGaugeVec( - LoadedModelMemoryGaugeName, + loadedModelMemoryGaugeName, "A gauge of models loaded memory in the system", labelNames, ) @@ -290,7 +259,7 @@ func createLoadedModelMemoryGauge() (*prometheus.GaugeVec, error) { func createEvictedModelMemoryGauge() (*prometheus.GaugeVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric, SeldonInternalModelMetric} return createGaugeVec( - EvictedModelMemoryGaugeName, + evictedModelMemoryGaugeName, "A gauge of models evicted from memory in the system", labelNames, ) @@ -299,7 +268,7 @@ func createEvictedModelMemoryGauge() (*prometheus.GaugeVec, error) { func createServerReplicaMemoryCapacityGauge() (*prometheus.GaugeVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createGaugeVec( - ServerReplicaMemoryCapacityGaugeName, + serverReplicaMemoryCapacityGaugeName, "A gauge of server replica memory capacity", labelNames, ) @@ -308,7 +277,7 @@ func createServerReplicaMemoryCapacityGauge() (*prometheus.GaugeVec, error) { func createServerReplicaMemoryCapacityWithOvercommitGauge() (*prometheus.GaugeVec, error) { labelNames := []string{SeldonServerMetric, SeldonServerReplicaMetric} return createGaugeVec( - ServerReplicaMemoryCapacityWithOverCommitGaugeName, + serverReplicaMemoryCapacityWithOverCommitGaugeName, "A gauge of server replica memory capacity with overcommit", labelNames, ) @@ -457,14 +426,6 @@ func (pm *PrometheusMetrics) addCacheEvictCount() { } func (pm *PrometheusMetrics) addInferCount(externalModelName, internalModelName, method string, code string) { - pm.modelInferCounter.With(prometheus.Labels{ - SeldonModelMetric: externalModelName, - SeldonInternalModelMetric: internalModelName, - SeldonServerMetric: pm.serverName, - SeldonServerReplicaMetric: pm.serverReplicaIdx, - MethodTypeMetric: method, - CodeMetric: code, - }).Inc() pm.modelAggregateInferCounter.With(prometheus.Labels{ SeldonServerMetric: pm.serverName, SeldonServerReplicaMetric: pm.serverReplicaIdx, @@ -473,14 +434,6 @@ func (pm *PrometheusMetrics) addInferCount(externalModelName, internalModelName, } func (pm *PrometheusMetrics) addInferLatency(externalModelName, internalModelName, method string, latency float64, code string) { - pm.modelInferLatencyCounter.With(prometheus.Labels{ - SeldonModelMetric: externalModelName, - SeldonInternalModelMetric: internalModelName, - SeldonServerMetric: pm.serverName, - SeldonServerReplicaMetric: pm.serverReplicaIdx, - MethodTypeMetric: method, - CodeMetric: code, - }).Add(latency) pm.modelAggregateInferLatencyCounter.With(prometheus.Labels{ SeldonServerMetric: pm.serverName, SeldonServerReplicaMetric: pm.serverReplicaIdx, diff --git a/scheduler/pkg/metrics/agent_test.go b/scheduler/pkg/metrics/agent_test.go index e11e6418ae..4990249ece 100644 --- a/scheduler/pkg/metrics/agent_test.go +++ b/scheduler/pkg/metrics/agent_test.go @@ -168,28 +168,6 @@ func TestInferModelMetrics(t *testing.T) { promMetrics.AddModelInferMetrics(modelName, modelName, method, latency, "200") actualVal := testutil.ToFloat64( - promMetrics.modelInferLatencyCounter.With(prometheus.Labels{ - SeldonModelMetric: modelName, - SeldonInternalModelMetric: modelName, - MethodTypeMetric: method, - SeldonServerMetric: serverName, - SeldonServerReplicaMetric: strconv.Itoa(serverIdx), - CodeMetric: "200", - })) - g.Expect(latency).To(Equal(actualVal)) - - actualVal = testutil.ToFloat64( - promMetrics.modelInferCounter.With(prometheus.Labels{ - SeldonModelMetric: modelName, - SeldonInternalModelMetric: modelName, - MethodTypeMetric: method, - SeldonServerMetric: serverName, - SeldonServerReplicaMetric: strconv.Itoa(serverIdx), - CodeMetric: "200", - })) - g.Expect(float64(1)).To(Equal(actualVal)) - - actualVal = testutil.ToFloat64( promMetrics.modelAggregateInferLatencyCounter.With(prometheus.Labels{ MethodTypeMetric: method, SeldonServerMetric: serverName, @@ -205,8 +183,6 @@ func TestInferModelMetrics(t *testing.T) { })) g.Expect(float64(1)).To(Equal(actualVal)) - promMetrics.modelInferLatencyCounter.Reset() - promMetrics.modelInferCounter.Reset() promMetrics.modelAggregateInferLatencyCounter.Reset() promMetrics.modelAggregateInferCounter.Reset() } diff --git a/scheduler/pkg/metrics/gateway.go b/scheduler/pkg/metrics/gateway.go index f03acfaeaa..34cf705752 100644 --- a/scheduler/pkg/metrics/gateway.go +++ b/scheduler/pkg/metrics/gateway.go @@ -11,17 +11,18 @@ import ( log "github.com/sirupsen/logrus" ) +// Pipeline metrics +// +// The aggregate metrics exist for efficiency, as the summation can be +// very slow in Prometheus when many pipelines exist. const ( + pipelineHistogramName = "seldon_pipeline_infer_api_seconds" + pipelineAggregateInferCounterName = "seldon_pipeline_aggregate_infer_total" + pipelineAggregateInferLatencyCounterName = "seldon_pipeline_aggregate_infer_seconds_total" +) - // start list of metrics - // Pipeline metrics - PipelineHistogramName = "seldon_pipeline_infer_api_seconds" - PipelineInferCounterName = "seldon_pipeline_infer_total" - PipelineInferLatencyCounterName = "seldon_pipeline_infer_seconds_total" - PipelineAggregateInferCounterName = "seldon_pipeline_aggregate_infer_total" - PipelineAggregateInferLatencyCounterName = "seldon_pipeline_aggregate_infer_seconds_total" - // end list of metrics - +// Metric labels +const ( SeldonPipelineMetric = "pipeline" ) @@ -35,8 +36,6 @@ type PrometheusPipelineMetrics struct { logger log.FieldLogger // Model metrics pipelineHistogram *prometheus.HistogramVec - pipelineInferCounter *prometheus.CounterVec - pipelineInferLatencyCounter *prometheus.CounterVec pipelineAggregateInferCounter *prometheus.CounterVec pipelineAggregateInferLatencyCounter *prometheus.CounterVec @@ -49,16 +48,6 @@ func NewPrometheusPipelineMetrics(logger log.FieldLogger) (*PrometheusPipelineMe return nil, err } - inferCounter, err := createPipelineInferCounter() - if err != nil { - return nil, err - } - - inferLatencyCounter, err := createPipelineInferLatencyCounter() - if err != nil { - return nil, err - } - aggregateInferCounter, err := createPipelineAggregateInferCounter() if err != nil { @@ -74,8 +63,6 @@ func NewPrometheusPipelineMetrics(logger log.FieldLogger) (*PrometheusPipelineMe serverName: "pipeline-gateway", logger: logger.WithField("source", "PrometheusMetrics"), pipelineHistogram: histogram, - pipelineInferCounter: inferCounter, - pipelineInferLatencyCounter: inferLatencyCounter, pipelineAggregateInferCounter: aggregateInferCounter, pipelineAggregateInferLatencyCounter: aggregateInferLatencyCounter, }, nil @@ -87,7 +74,7 @@ func createPipelineHistogram() (*prometheus.HistogramVec, error) { histogram := prometheus.NewHistogramVec( prometheus.HistogramOpts{ - Name: PipelineHistogramName, + Name: pipelineHistogramName, Help: "A histogram of latencies for pipeline gateway", Buckets: DefaultHistogramBuckets, }, @@ -104,28 +91,10 @@ func createPipelineHistogram() (*prometheus.HistogramVec, error) { return histogram, nil } -func createPipelineInferCounter() (*prometheus.CounterVec, error) { - labelNames := []string{SeldonServerMetric, SeldonPipelineMetric, MethodTypeMetric, CodeMetric} - return createCounterVec( - PipelineInferCounterName, - "A count of pipeline gateway calls", - labelNames, - ) -} - -func createPipelineInferLatencyCounter() (*prometheus.CounterVec, error) { - labelNames := []string{SeldonServerMetric, SeldonPipelineMetric, MethodTypeMetric, CodeMetric} - return createCounterVec( - PipelineInferLatencyCounterName, - "A sum of pipeline gateway call latencies", - labelNames, - ) -} - func createPipelineAggregateInferCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, MethodTypeMetric} return createCounterVec( - PipelineAggregateInferCounterName, + pipelineAggregateInferCounterName, "A count of pipeline gateway calls (aggregate)", labelNames, ) @@ -134,7 +103,7 @@ func createPipelineAggregateInferCounter() (*prometheus.CounterVec, error) { func createPipelineAggregateInferLatencyCounter() (*prometheus.CounterVec, error) { labelNames := []string{SeldonServerMetric, MethodTypeMetric} return createCounterVec( - PipelineAggregateInferLatencyCounterName, + pipelineAggregateInferLatencyCounterName, "A sum of pipeline gateway call latencies (aggregate)", labelNames, ) @@ -150,12 +119,6 @@ func (pm *PrometheusPipelineMetrics) AddPipelineInferMetrics(pipelineName string } func (pm *PrometheusPipelineMetrics) addInferCount(pipelineName, method string, code string) { - pm.pipelineInferCounter.With(prometheus.Labels{ - SeldonPipelineMetric: pipelineName, - SeldonServerMetric: pm.serverName, - MethodTypeMetric: method, - CodeMetric: code, - }).Inc() pm.pipelineAggregateInferCounter.With(prometheus.Labels{ SeldonServerMetric: pm.serverName, MethodTypeMetric: method, @@ -163,12 +126,6 @@ func (pm *PrometheusPipelineMetrics) addInferCount(pipelineName, method string, } func (pm *PrometheusPipelineMetrics) addInferLatency(pipelineName, method string, latency float64, code string) { - pm.pipelineInferLatencyCounter.With(prometheus.Labels{ - SeldonPipelineMetric: pipelineName, - SeldonServerMetric: pm.serverName, - MethodTypeMetric: method, - CodeMetric: code, - }).Add(latency) pm.pipelineAggregateInferLatencyCounter.With(prometheus.Labels{ SeldonServerMetric: pm.serverName, MethodTypeMetric: method,