From 8553f1822a45a4cc9f489a6114c3786a35f8f314 Mon Sep 17 00:00:00 2001 From: Pete MacKinnon Date: Tue, 20 Sep 2022 13:59:12 -0700 Subject: [PATCH] Wholesale updates for tritonserver version --- README.md | 2 +- .../source/developer_guide/guides/2_real_world_phishing.md | 2 +- examples/abp_nvsmi_detection/README.md | 4 ++-- examples/abp_pcap_detection/README.md | 4 ++-- examples/log_parsing/README.md | 4 ++-- examples/nlp_si_detection/README.md | 2 +- examples/ransomware_detection/README.md | 4 ++-- examples/sid_visualization/docker-compose.yml | 2 +- models/triton-model-repo/README.md | 7 ++----- scripts/validation/kafka_testing.md | 6 +++--- scripts/validation/val-globals.sh | 2 +- scripts/validation/val-utils.sh | 2 +- tests/benchmarks/README.md | 4 ++-- 13 files changed, 21 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index f9ae8772a9..b3cead28c0 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,7 @@ Use the following command to launch a Docker container for Triton loading all of ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \ -v $PWD/models:/models \ - nvcr.io/nvidia/tritonserver:22.06-py3 \ + nvcr.io/nvidia/tritonserver:22.08-py3 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --log-info=true \ diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md index c69f5720c7..a204f6e76d 100644 --- a/docs/source/developer_guide/guides/2_real_world_phishing.md +++ b/docs/source/developer_guide/guides/2_real_world_phishing.md @@ -149,7 +149,7 @@ Note: This step assumes you have both [Docker](https://docs.docker.com/engine/in From the root of the Morpheus project we will launch a Triton Docker container with the `models` directory mounted into the container: ```shell -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --log-info=true +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --log-info=true ``` Once we have Triton running, we can verify that it is healthy using [curl](https://curl.se/). The `/v2/health/live` endpoint should return a 200 status code: diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md index 06277587e8..ffc42c0464 100644 --- a/examples/abp_nvsmi_detection/README.md +++ b/examples/abp_nvsmi_detection/README.md @@ -65,12 +65,12 @@ This example utilizes the Triton Inference Server to perform inference. Pull the Docker image for Triton: ```bash -docker pull nvcr.io/nvidia/tritonserver:22.02-py3 +docker pull nvcr.io/nvidia/tritonserver:22.08-py3 ``` From the Morpheus repo root directory, run the following to launch Triton and load the `abp-nvsmi-xgb` XGBoost model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb ``` This will launch Triton and only load the `abp-nvsmi-xgb` model. This model has been configured with a max batch size of 32768, and to use dynamic batching for increased performance. diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md index e0963b2929..e994266d20 100644 --- a/examples/abp_pcap_detection/README.md +++ b/examples/abp_pcap_detection/README.md @@ -23,7 +23,7 @@ To run this example, an instance of Triton Inference Server and a sample dataset ### Triton Inference Server ```bash -docker pull nvcr.io/nvidia/tritonserver:22.02-py3 +docker pull nvcr.io/nvidia/tritonserver:22.08-py3 ``` ##### Deploy Triton Inference Server @@ -35,7 +35,7 @@ Bind the provided `abp-pcap-xgb` directory to the docker container model repo at cd /examples/abp_pcap_detection # Launch the container -docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models --exit-on-error=false --model-control-mode=poll --repository-poll-secs=30 +docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models --exit-on-error=false --model-control-mode=poll --repository-poll-secs=30 ``` ##### Verify Model Deployment diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md index 2a14a715f6..88b67160cc 100644 --- a/examples/log_parsing/README.md +++ b/examples/log_parsing/README.md @@ -26,14 +26,14 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri Example: ``` -docker pull nvcr.io/nvidia/tritonserver:22.02-py3 +docker pull nvcr.io/nvidia/tritonserver:22.08-py3 ``` ##### Start Triton Inference Server container ``` cd ${MORPHEUS_ROOT}/models -docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model log-parsing-onnx +docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model log-parsing-onnx ``` ##### Verify Model Deployment diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md index d267428e91..9f87beff2d 100644 --- a/examples/nlp_si_detection/README.md +++ b/examples/nlp_si_detection/README.md @@ -77,7 +77,7 @@ This example utilizes the Triton Inference Server to perform inference. The neur From the Morpheus repo root directory, run the following to launch Triton and load the `sid-minibert` model: ```bash -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx ``` Where `22.02-py3` can be replaced with the current year and month of the Triton version to use. For example, to use May 2021, specify `nvcr.io/nvidia/tritonserver:21.05-py3`. Ensure that the version of TensorRT that is used in Triton matches the version of TensorRT elsewhere (see [NGC Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)). diff --git a/examples/ransomware_detection/README.md b/examples/ransomware_detection/README.md index 0bcbcbcc8c..174a1f1b14 100644 --- a/examples/ransomware_detection/README.md +++ b/examples/ransomware_detection/README.md @@ -27,7 +27,7 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri Example: ``` -docker pull nvcr.io/nvidia/tritonserver:22.06-py3 +docker pull nvcr.io/nvidia/tritonserver:22.08-py3 ``` ##### Start Triton Inference Server container @@ -35,7 +35,7 @@ docker pull nvcr.io/nvidia/tritonserver:22.06-py3 cd ${MORPHEUS_ROOT}/examples/ransomware_detection # Run Triton in explicit mode -docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:22.06-py3 \ +docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:22.08-py3 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --model-control-mode=explicit \ diff --git a/examples/sid_visualization/docker-compose.yml b/examples/sid_visualization/docker-compose.yml index f123f0d867..f86cfd9294 100644 --- a/examples/sid_visualization/docker-compose.yml +++ b/examples/sid_visualization/docker-compose.yml @@ -25,7 +25,7 @@ x-with-gpus: &with_gpus services: triton: - image: nvcr.io/nvidia/tritonserver:22.06-py3 + image: nvcr.io/nvidia/tritonserver:22.08-py3 <<: *with_gpus command: "tritonserver --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx --model-repository=/models/triton-model-repo" environment: diff --git a/models/triton-model-repo/README.md b/models/triton-model-repo/README.md index ec866e574b..bed4ae533a 100644 --- a/models/triton-model-repo/README.md +++ b/models/triton-model-repo/README.md @@ -27,7 +27,7 @@ To launch Triton with one of the models in `triton-model-repo`, this entire repo ### Load `sid-minibert-onnx` Model with Default Triton Image ```bash -docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver nvcr.io/nvidia/tritonserver:21.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx +docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx ``` ### Load `abp-nvsmi-xgb` Model with FIL Backend Triton @@ -36,9 +36,6 @@ docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/model docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver triton_fil tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb ``` -Note: The FIL Backend Triton image was built with `docker build -t triton_fil -f ops/Dockerfile .`. Adjust the image name as necessary. - - ### Load `sid-minibert-trt` Model with Default Triton Image from Morpheus Repo To load a TensorRT model, it first must be compiled with the `morpheus tools onnx-to-trt` utility (See `triton-model-repo/sid-minibert-trt/1/README.md` for more info): @@ -51,5 +48,5 @@ morpheus tools onnx-to-trt --input_model ../../sid-minibert-onnx/1/sid-minibert. Then launch Triton: ```bash -docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/models:/models --name tritonserver nvcr.io/nvidia/tritonserver:21.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-trt +docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/models:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-trt ``` diff --git a/scripts/validation/kafka_testing.md b/scripts/validation/kafka_testing.md index a870608c61..e210ee825f 100644 --- a/scripts/validation/kafka_testing.md +++ b/scripts/validation/kafka_testing.md @@ -171,7 +171,7 @@ For this test we are going to replace the from & to file stages from the ABP val 1. In a new terminal launch Triton: ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \ - nvcr.io/nvidia/tritonserver:22.02-py3 \ + nvcr.io/nvidia/tritonserver:22.08-py3 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --model-control-mode=explicit \ @@ -338,7 +338,7 @@ For this test we are going to replace the from & to file stages from the Phishin 1. In a new terminal launch Triton: ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \ - nvcr.io/nvidia/tritonserver:22.02-py3 \ + nvcr.io/nvidia/tritonserver:22.08-py3 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --model-control-mode=explicit \ @@ -411,7 +411,7 @@ Note: Due to the complexity of the input data and a limitation of the cudf reade 1. In a new terminal launch Triton: ```bash docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \ - nvcr.io/nvidia/tritonserver:22.02-py3 \ + nvcr.io/nvidia/tritonserver:22.08-py3 \ tritonserver --model-repository=/models/triton-model-repo \ --exit-on-error=false \ --model-control-mode=explicit \ diff --git a/scripts/validation/val-globals.sh b/scripts/validation/val-globals.sh index 86d583e1c1..6439fca48a 100755 --- a/scripts/validation/val-globals.sh +++ b/scripts/validation/val-globals.sh @@ -26,7 +26,7 @@ export e="\033[0;90m" export y="\033[0;33m" export x="\033[0m" -export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.06-py3"} +export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.08-py3"} # TRITON_GRPC_PORT is only used when TRITON_URL is undefined export TRITON_GRPC_PORT=${TRITON_GRPC_PORT:-"8001"} diff --git a/scripts/validation/val-utils.sh b/scripts/validation/val-utils.sh index c112453655..dce6fc4afa 100755 --- a/scripts/validation/val-utils.sh +++ b/scripts/validation/val-utils.sh @@ -68,7 +68,7 @@ function wait_for_triton { function ensure_triton_running { - TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.06-py3"} + TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.08-py3"} IS_RUNNING=$(is_triton_running) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index c0ac40ed96..4620c1f9b0 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -24,14 +24,14 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri Example: ``` -docker pull nvcr.io/nvidia/tritonserver:22.02-py3 +docker pull nvcr.io/nvidia/tritonserver:22.08-py3 ``` ##### Start Triton Inference Server container ``` cd ${MORPHEUS_ROOT}/models -docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx +docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx ``` ##### Verify Model Deployments