From 8553f1822a45a4cc9f489a6114c3786a35f8f314 Mon Sep 17 00:00:00 2001
From: Pete MacKinnon <pmackinnon@nvidia.com>
Date: Tue, 20 Sep 2022 13:59:12 -0700
Subject: [PATCH] Wholesale updates for tritonserver version

---
 README.md                                                  | 2 +-
 .../source/developer_guide/guides/2_real_world_phishing.md | 2 +-
 examples/abp_nvsmi_detection/README.md                     | 4 ++--
 examples/abp_pcap_detection/README.md                      | 4 ++--
 examples/log_parsing/README.md                             | 4 ++--
 examples/nlp_si_detection/README.md                        | 2 +-
 examples/ransomware_detection/README.md                    | 4 ++--
 examples/sid_visualization/docker-compose.yml              | 2 +-
 models/triton-model-repo/README.md                         | 7 ++-----
 scripts/validation/kafka_testing.md                        | 6 +++---
 scripts/validation/val-globals.sh                          | 2 +-
 scripts/validation/val-utils.sh                            | 2 +-
 tests/benchmarks/README.md                                 | 4 ++--
 13 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/README.md b/README.md
index f9ae8772a9..b3cead28c0 100644
--- a/README.md
+++ b/README.md
@@ -104,7 +104,7 @@ Use the following command to launch a Docker container for Triton loading all of
 ```bash
 docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 \
 	-v $PWD/models:/models \
-	nvcr.io/nvidia/tritonserver:22.06-py3 \
+	nvcr.io/nvidia/tritonserver:22.08-py3 \
 	tritonserver --model-repository=/models/triton-model-repo \
 		--exit-on-error=false \
 		--log-info=true \
diff --git a/docs/source/developer_guide/guides/2_real_world_phishing.md b/docs/source/developer_guide/guides/2_real_world_phishing.md
index c69f5720c7..a204f6e76d 100644
--- a/docs/source/developer_guide/guides/2_real_world_phishing.md
+++ b/docs/source/developer_guide/guides/2_real_world_phishing.md
@@ -149,7 +149,7 @@ Note: This step assumes you have both [Docker](https://docs.docker.com/engine/in
 From the root of the Morpheus project we will launch a Triton Docker container with the `models` directory mounted into the container:
 
 ```shell
-docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --log-info=true
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --log-info=true
 ```
 
 Once we have Triton running, we can verify that it is healthy using [curl](https://curl.se/). The `/v2/health/live` endpoint should return a 200 status code:
diff --git a/examples/abp_nvsmi_detection/README.md b/examples/abp_nvsmi_detection/README.md
index 06277587e8..ffc42c0464 100644
--- a/examples/abp_nvsmi_detection/README.md
+++ b/examples/abp_nvsmi_detection/README.md
@@ -65,12 +65,12 @@ This example utilizes the Triton Inference Server to perform inference.
 
 Pull the Docker image for Triton:
 ```bash
-docker pull nvcr.io/nvidia/tritonserver:22.02-py3
+docker pull nvcr.io/nvidia/tritonserver:22.08-py3
 ```
 
 From the Morpheus repo root directory, run the following to launch Triton and load the `abp-nvsmi-xgb` XGBoost model:
 ```bash
-docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb
 ```
 
 This will launch Triton and only load the `abp-nvsmi-xgb` model. This model has been configured with a max batch size of 32768, and to use dynamic batching for increased performance.
diff --git a/examples/abp_pcap_detection/README.md b/examples/abp_pcap_detection/README.md
index e0963b2929..e994266d20 100644
--- a/examples/abp_pcap_detection/README.md
+++ b/examples/abp_pcap_detection/README.md
@@ -23,7 +23,7 @@ To run this example, an instance of Triton Inference Server and a sample dataset
 
 ### Triton Inference Server
 ```bash
-docker pull nvcr.io/nvidia/tritonserver:22.02-py3
+docker pull nvcr.io/nvidia/tritonserver:22.08-py3
 ```
 
 ##### Deploy Triton Inference Server
@@ -35,7 +35,7 @@ Bind the provided `abp-pcap-xgb` directory to the docker container model repo at
 cd <MORPHEUS_ROOT>/examples/abp_pcap_detection
 
 # Launch the container
-docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models --exit-on-error=false --model-control-mode=poll --repository-poll-secs=30
+docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/abp-pcap-xgb:/models/abp-pcap-xgb --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models --exit-on-error=false --model-control-mode=poll --repository-poll-secs=30
 ```
 
 ##### Verify Model Deployment
diff --git a/examples/log_parsing/README.md b/examples/log_parsing/README.md
index 2a14a715f6..88b67160cc 100644
--- a/examples/log_parsing/README.md
+++ b/examples/log_parsing/README.md
@@ -26,14 +26,14 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri
 Example:
 
 ```
-docker pull nvcr.io/nvidia/tritonserver:22.02-py3
+docker pull nvcr.io/nvidia/tritonserver:22.08-py3
 ```
 
 ##### Start Triton Inference Server container
 ```
 cd ${MORPHEUS_ROOT}/models
 
-docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model log-parsing-onnx
+docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model log-parsing-onnx
 ```
 
 ##### Verify Model Deployment
diff --git a/examples/nlp_si_detection/README.md b/examples/nlp_si_detection/README.md
index d267428e91..9f87beff2d 100644
--- a/examples/nlp_si_detection/README.md
+++ b/examples/nlp_si_detection/README.md
@@ -77,7 +77,7 @@ This example utilizes the Triton Inference Server to perform inference. The neur
 From the Morpheus repo root directory, run the following to launch Triton and load the `sid-minibert` model:
 
 ```bash
-docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.02-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx
 ```
 
 Where `22.02-py3` can be replaced with the current year and month of the Triton version to use. For example, to use May 2021, specify `nvcr.io/nvidia/tritonserver:21.05-py3`. Ensure that the version of TensorRT that is used in Triton matches the version of TensorRT elsewhere (see [NGC Deep Learning Frameworks Support Matrix](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)).
diff --git a/examples/ransomware_detection/README.md b/examples/ransomware_detection/README.md
index 0bcbcbcc8c..174a1f1b14 100644
--- a/examples/ransomware_detection/README.md
+++ b/examples/ransomware_detection/README.md
@@ -27,7 +27,7 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri
 Example:
 
 ```
-docker pull nvcr.io/nvidia/tritonserver:22.06-py3
+docker pull nvcr.io/nvidia/tritonserver:22.08-py3
 ```
 
 ##### Start Triton Inference Server container
@@ -35,7 +35,7 @@ docker pull nvcr.io/nvidia/tritonserver:22.06-py3
 cd ${MORPHEUS_ROOT}/examples/ransomware_detection
 
 # Run Triton in explicit mode
-docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:22.06-py3 \
+docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD/models:/models/triton-model-repo nvcr.io/nvidia/tritonserver:22.08-py3 \
    tritonserver --model-repository=/models/triton-model-repo \
                 --exit-on-error=false \
                 --model-control-mode=explicit \
diff --git a/examples/sid_visualization/docker-compose.yml b/examples/sid_visualization/docker-compose.yml
index f123f0d867..f86cfd9294 100644
--- a/examples/sid_visualization/docker-compose.yml
+++ b/examples/sid_visualization/docker-compose.yml
@@ -25,7 +25,7 @@ x-with-gpus: &with_gpus
 
 services:
   triton:
-    image: nvcr.io/nvidia/tritonserver:22.06-py3
+    image: nvcr.io/nvidia/tritonserver:22.08-py3
     <<: *with_gpus
     command: "tritonserver --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx --model-repository=/models/triton-model-repo"
     environment:
diff --git a/models/triton-model-repo/README.md b/models/triton-model-repo/README.md
index ec866e574b..bed4ae533a 100644
--- a/models/triton-model-repo/README.md
+++ b/models/triton-model-repo/README.md
@@ -27,7 +27,7 @@ To launch Triton with one of the models in `triton-model-repo`, this entire repo
 ### Load `sid-minibert-onnx` Model with Default Triton Image
 
 ```bash
-docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver nvcr.io/nvidia/tritonserver:21.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx
+docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-onnx
 ```
 
 ### Load `abp-nvsmi-xgb` Model with FIL Backend Triton
@@ -36,9 +36,6 @@ docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/model
 docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD:/models --name tritonserver triton_fil tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model abp-nvsmi-xgb
 ```
 
-Note: The FIL Backend Triton image was built with `docker build -t triton_fil -f ops/Dockerfile .`. Adjust the image name as necessary.
-
-
 ### Load `sid-minibert-trt` Model with Default Triton Image from Morpheus Repo
 
 To load a TensorRT model, it first must be compiled with the `morpheus tools onnx-to-trt` utility (See `triton-model-repo/sid-minibert-trt/1/README.md` for more info):
@@ -51,5 +48,5 @@ morpheus tools onnx-to-trt --input_model ../../sid-minibert-onnx/1/sid-minibert.
 Then launch Triton:
 
 ```bash
-docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/models:/models --name tritonserver nvcr.io/nvidia/tritonserver:21.06-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-trt
+docker run --rm --gpus=all -p 8000:8000 -p 8001:8001 -p 8002:8002 -v $PWD/models:/models --name tritonserver nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --exit-on-error=false --model-control-mode=explicit --load-model sid-minibert-trt
 ```
diff --git a/scripts/validation/kafka_testing.md b/scripts/validation/kafka_testing.md
index a870608c61..e210ee825f 100644
--- a/scripts/validation/kafka_testing.md
+++ b/scripts/validation/kafka_testing.md
@@ -171,7 +171,7 @@ For this test we are going to replace the from & to file stages from the ABP val
 1. In a new terminal launch Triton:
     ```bash
     docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \
-        nvcr.io/nvidia/tritonserver:22.02-py3 \
+        nvcr.io/nvidia/tritonserver:22.08-py3 \
         tritonserver --model-repository=/models/triton-model-repo \
                      --exit-on-error=false \
                      --model-control-mode=explicit \
@@ -338,7 +338,7 @@ For this test we are going to replace the from & to file stages from the Phishin
 1. In a new terminal launch Triton:
     ```bash
     docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \
-        nvcr.io/nvidia/tritonserver:22.02-py3 \
+        nvcr.io/nvidia/tritonserver:22.08-py3 \
         tritonserver --model-repository=/models/triton-model-repo \
                      --exit-on-error=false \
                      --model-control-mode=explicit \
@@ -411,7 +411,7 @@ Note: Due to the complexity of the input data and a limitation of the cudf reade
 1. In a new terminal launch Triton:
     ```bash
     docker run --rm -ti --gpus=all -p8000:8000 -p8001:8001 -p8002:8002 -v ${MORPHEUS_ROOT}/models:/models \
-        nvcr.io/nvidia/tritonserver:22.02-py3 \
+        nvcr.io/nvidia/tritonserver:22.08-py3 \
         tritonserver --model-repository=/models/triton-model-repo \
                      --exit-on-error=false \
                      --model-control-mode=explicit \
diff --git a/scripts/validation/val-globals.sh b/scripts/validation/val-globals.sh
index 86d583e1c1..6439fca48a 100755
--- a/scripts/validation/val-globals.sh
+++ b/scripts/validation/val-globals.sh
@@ -26,7 +26,7 @@ export e="\033[0;90m"
 export y="\033[0;33m"
 export x="\033[0m"
 
-export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.06-py3"}
+export TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.08-py3"}
 
 # TRITON_GRPC_PORT is only used when TRITON_URL is undefined
 export TRITON_GRPC_PORT=${TRITON_GRPC_PORT:-"8001"}
diff --git a/scripts/validation/val-utils.sh b/scripts/validation/val-utils.sh
index c112453655..dce6fc4afa 100755
--- a/scripts/validation/val-utils.sh
+++ b/scripts/validation/val-utils.sh
@@ -68,7 +68,7 @@ function wait_for_triton {
 
 function ensure_triton_running {
 
-   TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.06-py3"}
+   TRITON_IMAGE=${TRITON_IMAGE:-"nvcr.io/nvidia/tritonserver:22.08-py3"}
 
    IS_RUNNING=$(is_triton_running)
 
diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
index c0ac40ed96..4620c1f9b0 100644
--- a/tests/benchmarks/README.md
+++ b/tests/benchmarks/README.md
@@ -24,14 +24,14 @@ Pull Docker image from NGC (https://ngc.nvidia.com/catalog/containers/nvidia:tri
 Example:
 
 ```
-docker pull nvcr.io/nvidia/tritonserver:22.02-py3
+docker pull nvcr.io/nvidia/tritonserver:22.08-py3
 ```
 
 ##### Start Triton Inference Server container
 ```
 cd ${MORPHEUS_ROOT}/models
 
-docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.06-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx
+docker run --gpus=1 --rm -p8000:8000 -p8001:8001 -p8002:8002 -v $PWD:/models nvcr.io/nvidia/tritonserver:22.08-py3 tritonserver --model-repository=/models/triton-model-repo --model-control-mode=explicit --load-model sid-minibert-onnx --load-model abp-nvsmi-xgb --load-model phishing-bert-onnx
 ```
 
 ##### Verify Model Deployments