From ba98262ae18b55ff8cadd1d121349ab94db9f633 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Fri, 6 Sep 2024 22:18:10 -0400
Subject: [PATCH 01/10] Module 5

---
 .github/workflows/module-5.yaml | 108 +++++++++++++++++++++-----------
 module-5/Dockerfile             |  47 +++++++-------
 module-5/Makefile               |   2 +-
 module-5/PRACTICE.md            |  25 +++-----
 module-5/README.md              |  26 ++++----
 module-5/requirements.txt       |  16 +++--
 module-5/serving/predictor.py   |   4 +-
 7 files changed, 131 insertions(+), 97 deletions(-)

diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 38d01b5..532509f 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -1,7 +1,15 @@
 name: Module 5
 
 on:
-  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+    # paths:
+    #   - 'module-4/**'
 
 jobs:
   build:
@@ -10,44 +18,74 @@ jobs:
       - name: Checkout 
         uses: actions/checkout@v2
 
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
+      # - name: Login to Docker Hub
+      #   uses: docker/login-action@v1
+      #   with:
+      #     username: ${{ secrets.DOCKER_HUB_USERNAME }}
+      #     password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
 
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
+      # - name: Set up Docker Buildx
+      #   uses: docker/setup-buildx-action@v1
 
-      - name: Build app streamlit
-        uses: docker/build-push-action@v2
-        with:
-          context: week-5/
-          file: week-5/Dockerfile
-          push: true
-          target: app-streamlit
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache,mode=max
+      # - name: Build app streamlit
+      #   uses: docker/build-push-action@v2
+      #   with:
+      #     context: week-5/
+      #     file: week-5/Dockerfile
+      #     push: true
+      #     target: app-streamlit
+      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:latest
+      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache
+      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache,mode=max
+
+      # - name: Build app fastapi
+      #   uses: docker/build-push-action@v2
+      #   with:
+      #     context: week-5/
+      #     file: week-5/Dockerfile
+      #     push: true
+      #     target: app-fastapi
+      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:latest
+      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache
+      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache,mode=max
 
-      - name: Build app fastapi
-        uses: docker/build-push-action@v2
+      # - name: Build app seldon
+      #   uses: docker/build-push-action@v2
+      #   with:
+      #     context: week-5/
+      #     file: week-5/Dockerfile
+      #     push: true
+      #     target: app-seldon
+      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
+      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
+      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
+
+
+  streamlit-docker:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
         with:
-          context: week-5/
-          file: week-5/Dockerfile
-          push: true
-          target: app-fastapi
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache,mode=max
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      
+      # - name: Extract metadata (tags, labels) for Docker
+      #   id: meta
+      #   uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
+      #   with:
+      #     images: ghcr.io/kyryl-opens-ml/app-streamlit
 
-      - name: Build app seldon
-        uses: docker/build-push-action@v2
+      - name: Build and push Docker image
+        uses: docker/build-push-action@v6
         with:
-          context: week-5/
-          file: week-5/Dockerfile
+          context: module-5/
           push: true
-          target: app-seldon
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
+          target: app-streamlit
+          tags: ghcr.io/kyryl-opens-ml/app-streamlit:latest
diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index d35929c..11ae028 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -1,4 +1,3 @@
-# FROM huggingface/transformers-pytorch-gpu:4.22.1 as base
 FROM huggingface/transformers-pytorch-gpu:4.35.2 as base
 
 WORKDIR /app
@@ -23,34 +22,34 @@ FROM base AS app-streamlit
 CMD streamlit run --server.address 0.0.0.0 --server.port 8080 serving/ui_app.py
 
 
-# Fast API docker image
-FROM base AS app-fastapi
-CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
+# # Fast API docker image
+# FROM base AS app-fastapi
+# CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
 
 
-FROM base AS app-seldon
+# FROM base AS app-seldon
 
-# Port for GRPC
-EXPOSE 5000
-# Port for REST
-EXPOSE 9000
+# # Port for GRPC
+# EXPOSE 5000
+# # Port for REST
+# EXPOSE 9000
 
-# Define environment variables
-ENV MODEL_NAME SeldonAPI
-ENV SERVICE_TYPE MODEL
-# COPY /app/serving/seldon_api.py /app/SeldonAPI.py
-COPY serving/seldon_api.py /app/SeldonAPI.py
+# # Define environment variables
+# ENV MODEL_NAME SeldonAPI
+# ENV SERVICE_TYPE MODEL
+# # COPY /app/serving/seldon_api.py /app/SeldonAPI.py
+# COPY serving/seldon_api.py /app/SeldonAPI.py
 
-RUN chown -R 8888 /app
-RUN mkdir /.cache
-RUN chmod 777 /.cache
-RUN mkdir /.config
-RUN chmod 777 /.config
+# RUN chown -R 8888 /app
+# RUN mkdir /.cache
+# RUN chmod 777 /.cache
+# RUN mkdir /.config
+# RUN chmod 777 /.config
 
-CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE
+# CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE
 
 
-FROM base AS app-kserve
-ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-RUN pip install protobuf==3.20.*
-ENTRYPOINT ["python", "serving/kserve_api.py"]
+# FROM base AS app-kserve
+# ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+# RUN pip install protobuf==3.20.*
+# ENTRYPOINT ["python", "serving/kserve_api.py"]
diff --git a/module-5/Makefile b/module-5/Makefile
index d59b0fa..fac3443 100644
--- a/module-5/Makefile
+++ b/module-5/Makefile
@@ -2,7 +2,7 @@ build_all:
 	docker build -f Dockerfile -t all:latest --target app-seldon .    
 
 build_app_streamlit:
-	docker build -f Dockerfile -t app-streamlit:latest --target app-streamlit .  
+	docker build -f Dockerfile -t app-streamlit:latest --target app-streamlit . 
 
 run_app_streamlit: build_app_streamlit
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-streamlit:latest
diff --git a/module-5/PRACTICE.md b/module-5/PRACTICE.md
index 097391d..90d2448 100644
--- a/module-5/PRACTICE.md
+++ b/module-5/PRACTICE.md
@@ -1,11 +1,10 @@
-# Practice 
-
-*** 
+# Practice
 
+***
 
 # H9: API serving
 
-## Reading list: 
+## Reading list:
 
 - [CS 329S Lecture 8. Model Deployment](https://docs.google.com/document/d/1hNuW6bqWYZjlwpit_8W1cu7kllb-jTfy3Liof1GJWug/edit#heading=h.kp1fg79091xd)
 - [Machine Learning Systems Design](https://docs.google.com/presentation/d/1U_zKs19VLJKnGE02JDRnzxJ8lgeVF22WSZ_GrA646fY/edit#slide=id.p)
@@ -16,7 +15,6 @@
 - [Gradio Quickstart](https://www.gradio.app/guides/quickstart) 
 - [Top 6 Kubernetes Deployment Strategies and How to Choose](https://codefresh.io/learn/kubernetes-deployment/top-6-kubernetes-deployment-strategies-and-how-to-choose/)
 
-
 ## Task:
 
 - PR1: Write a Streamlit UI for serving your model, with tests and CI integration.
@@ -24,14 +22,13 @@
 - PR3: Write a FastAPI server for your model, with tests and CI integration.
 - PR4: Write a Kubernetes deployment YAML (Deployment, Service) for your model's API.
 - PR5: Write a Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio).
-- Google doc update with a model serving plan for your ML model. 
+- Google doc update with a model serving plan for your ML model.
 
-## Criteria: 
+## Criteria:
 
-- 5 PRs merged 
+- 5 PRs merged
 - Serving plan in the google doc.
 
-
 # H10: Inference servers
 
 ## Reading list:
@@ -52,17 +49,15 @@
 
 ## Task:
 
-
 - PR1: Write code for Seldon API deployment of your model, including tests.
 - PR2: Write code for KServe API integration with your model, including tests.
 - PR3: Write code for Triton Inference Server deployment, incorporating tests.
 - PR4: Write code for Ray deployment, complete with tests.
-- PR5: Write code for LLM deployment using TGI, vLLM, and LoRAX.
-- PR6: Write code for LLM deployment with ModalLab.
+- PR5 (optional): Write code for LLM deployment using TGI, vLLM, and LoRAX.
+- PR6 (optional): Write code for LLM deployment with ModalLab.
 - Update the Google document on model serving, outlining options and comparisons between custom servers and inference servers. Decide and explain which solution you will use and why.
 
-
 ## Criteria:
 
-- 6 PRs merged 
-- Serving comparisons and conclusion in the google doc.
+- 6 PRs merged
+- Serving comparisons and conclusion in the google doc.
\ No newline at end of file
diff --git a/module-5/README.md b/module-5/README.md
index 481f8ba..c609e3c 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -2,29 +2,29 @@
 
 ![alt text](./../docs/serving.jpg)
 
-# Practice 
+# Practice
 
 [Practice task](./PRACTICE.md)
 
-*** 
+***
 
 # Reference implementation
 
-*** 
+***
 
 
 
-# Setup 
+# Setup
 
-Create kind cluster 
+Create kind cluster
 
-```
-kind create cluster --name ml-in-production-course-week-5
+```bash
+kind create cluster --name ml-in-production
 ```
 
-Run k9s 
+Run k9s
 
-```
+```bash
 k9s -A
 ```
 
@@ -33,7 +33,7 @@ k9s -A
 
 
 ```
-export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+export WANDB_API_KEY='put your key'
 ```
 
 
@@ -84,7 +84,7 @@ http POST http://0.0.0.0:8080/predict < samples.json
 pytest -ss ./tests
 ```
 
-# Triton 
+# Triton Inference Server 
 
 
 ```
@@ -102,6 +102,10 @@ tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example
 - https://github.com/triton-inference-server/fastertransformer_backend
 - https://github.com/triton-inference-server/fastertransformer_backend
 
+
+
+
+
 # LLMs
 
 
diff --git a/module-5/requirements.txt b/module-5/requirements.txt
index 23fdb2f..94155fd 100644
--- a/module-5/requirements.txt
+++ b/module-5/requirements.txt
@@ -1,12 +1,10 @@
-gunicorn==22.0.0
-streamlit==1.36.0
-uvicorn==0.24.0.post1
-fastapi==0.111.0
-transformers==4.42.3
-datasets==2.14.6
-typer==0.9.0
-wandb==0.16.1
-kserve  
+transformers==4.44.2
+gunicorn==23.0.0
+streamlit==1.38.0
+uvicorn==0.21.1
+fastapi==0.109.2
+wandb==0.17.9
+kserve 
 # seldon-core==1.14.1
 # # kserve==0.10.1
 # # ray==2.0.0
\ No newline at end of file
diff --git a/module-5/serving/predictor.py b/module-5/serving/predictor.py
index e956c50..670230b 100644
--- a/module-5/serving/predictor.py
+++ b/module-5/serving/predictor.py
@@ -12,7 +12,7 @@
 
 logger = logging.getLogger()
 
-MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest"
+MODEL_ID = "truskovskiyk/ml-in-production-practice/airflow-pipeline:latest"
 MODEL_PATH = "/tmp/model"
 MODEL_LOCK = ".lock-file"
 
@@ -39,7 +39,7 @@ def predict(self, text: List[str]):
     @classmethod
     def default_from_model_registry(cls) -> "Predictor":
         with FileLock(MODEL_LOCK):
-            if not (Path(MODEL_PATH) / "pytorch_model.bin").exists():
+            if not (Path(MODEL_PATH) / "model.safetensors").exists():
                 load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
 
         return cls(model_load_path=MODEL_PATH)

From 6a9ec151eace74b150a32c95ab296df8a8e123a2 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Fri, 6 Sep 2024 22:26:22 -0400
Subject: [PATCH 02/10] ci

---
 module-5/Dockerfile       | 6 ++----
 module-5/requirements.txt | 1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index 11ae028..df4673d 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -1,17 +1,15 @@
-FROM huggingface/transformers-pytorch-gpu:4.35.2 as base
+# FROM huggingface/transformers-pytorch-gpu:4.35.2 as base
+FROM python:3.11 as base
 
 WORKDIR /app
 
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
 
-RUN pip install pip --upgrade 
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 
 
-RUN ln -s /usr/bin/python3 /usr/bin/python
-
 ENV PYTHONPATH /app
 COPY . . 
 
diff --git a/module-5/requirements.txt b/module-5/requirements.txt
index 94155fd..67e57e2 100644
--- a/module-5/requirements.txt
+++ b/module-5/requirements.txt
@@ -5,6 +5,7 @@ uvicorn==0.21.1
 fastapi==0.109.2
 wandb==0.17.9
 kserve 
+torch
 # seldon-core==1.14.1
 # # kserve==0.10.1
 # # ray==2.0.0
\ No newline at end of file

From 542c32c1ca1d94eff98becddf1f62031d8ab3000 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Fri, 6 Sep 2024 22:34:29 -0400
Subject: [PATCH 03/10] ci

---
 .github/workflows/module-5.yaml               | 23 +++++++++++++------
 module-5/Dockerfile                           |  6 ++---
 module-5/README.md                            |  3 ++-
 module-5/{ => data-samples}/iris-input.json   |  0
 module-5/{ => data-samples}/kserve-input.json |  0
 module-5/{ => data-samples}/samples.json      |  0
 module-5/k8s/app-streamlit.yaml               |  2 +-
 module-5/serving/predictor.py                 |  2 +-
 8 files changed, 23 insertions(+), 13 deletions(-)
 rename module-5/{ => data-samples}/iris-input.json (100%)
 rename module-5/{ => data-samples}/kserve-input.json (100%)
 rename module-5/{ => data-samples}/samples.json (100%)

diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 532509f..47c8d60 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -12,11 +12,11 @@ on:
     #   - 'module-4/**'
 
 jobs:
-  build:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
+  # build:
+  #   runs-on: ubuntu-latest
+  #   steps:
+  #     - name: Checkout 
+  #       uses: actions/checkout@v2
 
       # - name: Login to Docker Hub
       #   uses: docker/login-action@v1
@@ -61,7 +61,7 @@ jobs:
       #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
 
 
-  streamlit-docker:
+  docker-builds:
     runs-on: ubuntu-latest
     permissions:
       contents: read
@@ -69,6 +69,7 @@ jobs:
     steps:
       - name: Checkout repository
         uses: actions/checkout@v4
+      
       - name: Log in to the Container registry
         uses: docker/login-action@v3
         with:
@@ -82,10 +83,18 @@ jobs:
       #   with:
       #     images: ghcr.io/kyryl-opens-ml/app-streamlit
 
-      - name: Build and push Docker image
+      - name: Build and push app-streamlit
         uses: docker/build-push-action@v6
         with:
           context: module-5/
           push: true
           target: app-streamlit
           tags: ghcr.io/kyryl-opens-ml/app-streamlit:latest
+
+      - name: Build and push app-fastapi
+        uses: docker/build-push-action@v6
+        with:
+          context: module-5/
+          push: true
+          target: app-fastapi
+          tags: ghcr.io/kyryl-opens-ml/app-fastapi:latest
diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index df4673d..02eadf7 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -20,9 +20,9 @@ FROM base AS app-streamlit
 CMD streamlit run --server.address 0.0.0.0 --server.port 8080 serving/ui_app.py
 
 
-# # Fast API docker image
-# FROM base AS app-fastapi
-# CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
+# Fast API docker image
+FROM base AS app-fastapi
+CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
 
 
 # FROM base AS app-seldon
diff --git a/module-5/README.md b/module-5/README.md
index c609e3c..bab145e 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -77,7 +77,7 @@ kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8081:8080
 # Test 
 
 ```
-http POST http://0.0.0.0:8080/predict < samples.json
+curl -X POST -H "Content-Type: application/json" -d @data-samples/samples.json http://0.0.0.0:8080/predict
 ```
 
 ```
@@ -87,6 +87,7 @@ pytest -ss ./tests
 # Triton Inference Server 
 
 
+
 ```
 docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
 
diff --git a/module-5/iris-input.json b/module-5/data-samples/iris-input.json
similarity index 100%
rename from module-5/iris-input.json
rename to module-5/data-samples/iris-input.json
diff --git a/module-5/kserve-input.json b/module-5/data-samples/kserve-input.json
similarity index 100%
rename from module-5/kserve-input.json
rename to module-5/data-samples/kserve-input.json
diff --git a/module-5/samples.json b/module-5/data-samples/samples.json
similarity index 100%
rename from module-5/samples.json
rename to module-5/data-samples/samples.json
diff --git a/module-5/k8s/app-streamlit.yaml b/module-5/k8s/app-streamlit.yaml
index 20899d6..edf4a98 100644
--- a/module-5/k8s/app-streamlit.yaml
+++ b/module-5/k8s/app-streamlit.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-streamlit
-          image: kyrylprojector/app-streamlit:latest
+          image: ghcr.io/kyryl-opens-ml/app-streamlit:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:
diff --git a/module-5/serving/predictor.py b/module-5/serving/predictor.py
index 670230b..2f478b5 100644
--- a/module-5/serving/predictor.py
+++ b/module-5/serving/predictor.py
@@ -34,7 +34,7 @@ def __init__(self, model_load_path: str):
     def predict(self, text: List[str]):
         text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True)
         bert_outputs = self.model(**text_encoded).logits
-        return softmax(bert_outputs).numpy()
+        return softmax(bert_outputs, dim=-1).numpy()
 
     @classmethod
     def default_from_model_registry(cls) -> "Predictor":

From 8d3fe404d61e1c9e830b3906f34e360505481b45 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Fri, 6 Sep 2024 23:42:32 -0400
Subject: [PATCH 04/10] ci

---
 module-5/Dockerfile                           |   3 +
 module-5/Makefile                             |  20 +--
 module-5/requirements.txt                     |   4 +-
 module-5/serving/pytriton_client.py           |  22 ++++
 module-5/serving/pytriton_serving.py          |  36 ++++++
 .../triton-python-example/add_sub/1/model.py  | 110 ----------------
 .../triton-python-example/add_sub/client.py   |  55 --------
 .../add_sub/config.pbtxt                      |  33 -----
 .../nlp-model/1/model.py                      | 120 ------------------
 .../nlp-model/config.pbtxt                    |  22 ----
 10 files changed, 74 insertions(+), 351 deletions(-)
 create mode 100644 module-5/serving/pytriton_client.py
 create mode 100644 module-5/serving/pytriton_serving.py
 delete mode 100644 module-5/triton-python-example/add_sub/1/model.py
 delete mode 100644 module-5/triton-python-example/add_sub/client.py
 delete mode 100644 module-5/triton-python-example/add_sub/config.pbtxt
 delete mode 100644 module-5/triton-python-example/nlp-model/1/model.py
 delete mode 100644 module-5/triton-python-example/nlp-model/config.pbtxt

diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index 02eadf7..ea2a850 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -24,6 +24,9 @@ CMD streamlit run --server.address 0.0.0.0 --server.port 8080 serving/ui_app.py
 FROM base AS app-fastapi
 CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
 
+FROM base AS app-pytriton
+CMD python serving/pytriton_serving.py
+
 
 # FROM base AS app-seldon
 
diff --git a/module-5/Makefile b/module-5/Makefile
index fac3443..1645b9d 100644
--- a/module-5/Makefile
+++ b/module-5/Makefile
@@ -13,22 +13,22 @@ build_fast_api:
 run_fast_api: build_fast_api
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-fastapi:latest
 
+build_pytriton:
+	docker build -f Dockerfile -t app-pytriton:latest --target app-pytriton .  
+
+run_pytriton: build_pytriton
+	docker run -it -p 8001:8001 -p 8000:8000 -p 8002:8002 -e WANDB_API_KEY=${WANDB_API_KEY} app-pytriton:latest
+
+
+
+
+
 build_app_seldon:
 	docker build -f Dockerfile -t app-seldon:latest --target app-seldon .  
 
 run_app_seldon: build_app_seldon
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-seldon:latest
 
-run_dev: build_all
-	docker run -it --net=host -v $PWD:/dev_data -e WANDB_API_KEY=${WANDB_API_KEY} all:latest /bin/bash
-
-format:
-	black --line-length 120 serving tests
-	isort -rc serving tests
-
-lint:
-	flake8 --max-line-length 120 serving tests
-
 build_kserve:
 	docker build -f Dockerfile -t app-kserve:latest --target app-kserve .
 
diff --git a/module-5/requirements.txt b/module-5/requirements.txt
index 67e57e2..47d6bf6 100644
--- a/module-5/requirements.txt
+++ b/module-5/requirements.txt
@@ -5,7 +5,9 @@ uvicorn==0.21.1
 fastapi==0.109.2
 wandb==0.17.9
 kserve 
-torch
+torch==2.4.1
+nvidia_pytriton==0.5.10
+ipython
 # seldon-core==1.14.1
 # # kserve==0.10.1
 # # ray==2.0.0
\ No newline at end of file
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
new file mode 100644
index 0000000..2615176
--- /dev/null
+++ b/module-5/serving/pytriton_client.py
@@ -0,0 +1,22 @@
+import numpy as np
+from pytriton.client import ModelClient
+
+client = ModelClient("localhost", "predictor_a")
+print(client.model_config)
+
+
+sequence = np.array([
+    ["one day I will see the world"],
+])
+sequence = np.char.encode(sequence, "utf-8")
+
+result_dict = client.infer_batch(text=sequence)
+
+data = np.array([1, 2, ], dtype=np.float32)
+print(client.infer_sample(text="test"))
+
+
+# kill -SIGINT 424
+# Response like a list for an amazing engineers. Don’t add comments or overlap. Keep it concise.
+
+
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
new file mode 100644
index 0000000..5b489f4
--- /dev/null
+++ b/module-5/serving/pytriton_serving.py
@@ -0,0 +1,36 @@
+import logging
+
+import numpy as np
+
+from pytriton.decorators import batch
+from pytriton.model_config import ModelConfig, Tensor
+from pytriton.triton import Triton
+
+from serving.predictor import Predictor
+
+logger = logging.getLogger("pytriton_serving")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s: %(message)s")
+
+
+@batch
+def _infer_fn(text: np.ndarray):
+    print(text)
+    return {"probs": np.array([[0.32, 0.312]])} 
+
+
+def main():
+    with Triton() as triton:
+        logger.info("Loading model.")
+        triton.bind(
+            model_name="predictor_a",
+            infer_func=_infer_fn,
+            inputs=[Tensor(name="text", dtype=object, shape=(-1,))],
+            outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],
+            config=ModelConfig(max_batch_size=4),
+        )
+        logger.info("Serving inference")
+        triton.serve()
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/module-5/triton-python-example/add_sub/1/model.py b/module-5/triton-python-example/add_sub/1/model.py
deleted file mode 100644
index 525f447..0000000
--- a/module-5/triton-python-example/add_sub/1/model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import json
-import triton_python_backend_utils as pb_utils
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-
-        # You must parse model_config. JSON string is not parsed here
-        self.model_config = model_config = json.loads(args["model_config"])
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
-
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
-
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config["data_type"]
-        )
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config["data_type"]
-        )
-
-    def execute(self, requests):
-        """`execute` MUST be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference request is made
-        for this model. Depending on the batching configuration (e.g. Dynamic
-        Batching) used, `requests` may contain multiple requests. Every
-        Python model, must create one pb_utils.InferenceResponse for every
-        pb_utils.InferenceRequest in `requests`. If there is an error, you can
-        set the error argument when creating a pb_utils.InferenceResponse
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        output0_dtype = self.output0_dtype
-        output1_dtype = self.output1_dtype
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for request in requests:
-            # Get INPUT0
-            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
-            # Get INPUT1
-            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
-
-            out_0, out_1 = (
-                in_0.as_numpy() + in_1.as_numpy(),
-                in_0.as_numpy() - in_1.as_numpy(),
-            )
-
-            # Create output tensors. You need pb_utils.Tensor
-            # objects to create pb_utils.InferenceResponse.
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
-            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
-
-            # Create InferenceResponse. You can set an error here in case
-            # there was a problem with handling this inference request.
-            # Below is an example of how you can set errors in inference
-            # response:
-            #
-            # pb_utils.InferenceResponse(
-            #    output_tensors=..., TritonError("An error occurred"))
-            inference_response = pb_utils.InferenceResponse(
-                output_tensors=[out_tensor_0, out_tensor_1]
-            )
-            responses.append(inference_response)
-
-        # You should return a list of pb_utils.InferenceResponse. Length
-        # of this list must match the length of `requests` list.
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is OPTIONAL. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print("Cleaning up...")
\ No newline at end of file
diff --git a/module-5/triton-python-example/add_sub/client.py b/module-5/triton-python-example/add_sub/client.py
deleted file mode 100644
index ea4f8b2..0000000
--- a/module-5/triton-python-example/add_sub/client.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-
-import numpy as np
-import tritonclient.http as httpclient
-from tritonclient.utils import *
-
-model_name = "add_sub"
-shape = [4]
-
-with httpclient.InferenceServerClient("localhost:8000") as client:
-
-    input0_data = np.random.rand(*shape).astype(np.float32)
-    input1_data = np.random.rand(*shape).astype(np.float32)
-
-
-    inputs = [
-        httpclient.InferInput("INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)),
-        httpclient.InferInput("INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)),
-    ]
-
-    inputs[0].set_data_from_numpy(input0_data)
-    inputs[1].set_data_from_numpy(input1_data)
-
-    outputs = [
-        httpclient.InferRequestedOutput("OUTPUT0"),
-        httpclient.InferRequestedOutput("OUTPUT1"),
-    ]
-
-    response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
-
-    result = response.get_response()
-    output0_data = response.as_numpy("OUTPUT0")
-    output1_data = response.as_numpy("OUTPUT1")
-
-    print(
-        "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
-            input0_data, input1_data, output0_data
-        )
-    )
-    print(
-        "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
-            input0_data, input1_data, output1_data
-        )
-    )
-
-    if not np.allclose(input0_data + input1_data, output0_data):
-        print("add_sub example error: incorrect sum")
-        sys.exit(1)
-
-    if not np.allclose(input0_data - input1_data, output1_data):
-        print("add_sub example error: incorrect difference")
-        sys.exit(1)
-
-    print("PASS: add_sub")
-    sys.exit(0)
\ No newline at end of file
diff --git a/module-5/triton-python-example/add_sub/config.pbtxt b/module-5/triton-python-example/add_sub/config.pbtxt
deleted file mode 100644
index 105ec79..0000000
--- a/module-5/triton-python-example/add_sub/config.pbtxt
+++ /dev/null
@@ -1,33 +0,0 @@
-name: "add_sub"
-backend: "vlmlm"
-
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-
-instance_group [{ kind: KIND_CPU }]
\ No newline at end of file
diff --git a/module-5/triton-python-example/nlp-model/1/model.py b/module-5/triton-python-example/nlp-model/1/model.py
deleted file mode 100644
index bd7f617..0000000
--- a/module-5/triton-python-example/nlp-model/1/model.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import albumentations as A
-import boto3
-import numpy as np
-import torch
-from PIL import Image
-import requests
-from PIL import Image
-from io import BytesIO
-import requests
-import json
-from pathlib import Path
-
-
-import json
-import triton_python_backend_utils as pb_utils
-import torchvision
-
-import logging
-from pathlib import Path
-from typing import List
-
-import pandas as pd
-import torch
-import wandb
-from filelock import FileLock
-from torch.nn.functional import softmax
-from tqdm import tqdm
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-logger = logging.getLogger()
-
-MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest"
-MODEL_PATH = "/tmp/model"
-MODEL_LOCK = ".lock-file"
-
-
-def load_from_registry(model_name: str, model_path: Path):
-    with wandb.init() as run:
-        artifact = run.use_artifact(model_name, type="model")
-        artifact_dir = artifact.download(root=model_path)
-        print(f"{artifact_dir}")
-
-
-class Predictor:
-    def __init__(self, model_load_path: str):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_load_path)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_load_path)
-        self.model.eval()
-
-    @torch.no_grad()
-    def predict(self, text: List[str]):
-        text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True)
-        bert_outputs = self.model(**text_encoded).logits
-        return softmax(bert_outputs).numpy()
-
-    @classmethod
-    def default_from_model_registry(cls) -> "Predictor":
-        with FileLock(MODEL_LOCK):
-            if not (Path(MODEL_PATH) / "pytorch_model.bin").exists():
-                load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
-
-        return cls(model_load_path=MODEL_PATH)
-
-    def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
-        correct_sentence_conf = []
-        for idx in tqdm(range(len(df))):
-            sentence = df.iloc[idx]["sentence"]
-            conf = self.predict([sentence]).flatten()[1]
-            correct_sentence_conf.append(conf)
-        df["correct_sentence_conf"] = correct_sentence_conf
-        return df
-
-    
-class TritonPythonModel:
-    def initialize(self, args):
-        self.model_config = model_config = json.loads(args["model_config"])
-
-        output0_config = pb_utils.get_output_config_by_name(model_config, "pred_boxes")
-        output1_config = pb_utils.get_output_config_by_name(model_config, "scores")
-        output2_config = pb_utils.get_output_config_by_name(model_config, "pred_classes")
-
-        self.output0_dtype = pb_utils.triton_string_to_numpy(output0_config["data_type"])
-        self.output1_dtype = pb_utils.triton_string_to_numpy(output1_config["data_type"])
-        self.output2_dtype = pb_utils.triton_string_to_numpy(output2_config["data_type"])
-
-        self.Predictor = Predictor.default_from_model_registry()
-
-    def execute(self, requests):
-        output0_dtype = self.output0_dtype
-        output1_dtype = self.output1_dtype
-        output2_dtype = self.output2_dtype
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for request in requests:
-            in_0 = pb_utils.get_input_tensor_by_name(request, "text")
-            print(in_0.as_numpy())
-            url = str(in_0.as_numpy()[0], encoding="utf-8")
-            print(url, type(url))
-
-            output = self.damage_segmentation_model.process_image(url=url)
-
-            out_tensor_0 = pb_utils.Tensor("pred_boxes", output["pred_boxes"].astype(output0_dtype))
-            out_tensor_1 = pb_utils.Tensor("scores", output["scores"].astype(output1_dtype))
-            out_tensor_2 = pb_utils.Tensor("pred_classes", output["pred_classes"].astype(output2_dtype))
-
-            inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0, out_tensor_1, out_tensor_2])
-            responses.append(inference_response)
-
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is optional. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print("Cleaning up...")
diff --git a/module-5/triton-python-example/nlp-model/config.pbtxt b/module-5/triton-python-example/nlp-model/config.pbtxt
deleted file mode 100644
index 27bb1e1..0000000
--- a/module-5/triton-python-example/nlp-model/config.pbtxt
+++ /dev/null
@@ -1,22 +0,0 @@
-
-name: "nlp-model"
-backend: "python"
-
-input [
-  {
-    name: "text"
-    data_type: TYPE_STRING
-    dims: [ 1 ]
-    
-  }
-]
-
-output [
-  {
-    name: "pred_boxes"
-    data_type: TYPE_FP32
-    dims: [ 100, 4 ]
-  }
-]
-
-instance_group [{ kind: KIND_CPU }]

From e0962bbd97283744e4decb18931b477973fc03b5 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Fri, 6 Sep 2024 23:55:50 -0400
Subject: [PATCH 05/10] ci

---
 .github/workflows/module-5.yaml      |  9 +++++
 module-5/README.md                   |  8 ++---
 module-5/serving/pytriton_client.py  | 31 +++++++++---------
 module-5/serving/pytriton_serving.py | 49 +++++++++++++++++++++++-----
 4 files changed, 67 insertions(+), 30 deletions(-)

diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 47c8d60..59d8b42 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -98,3 +98,12 @@ jobs:
           push: true
           target: app-fastapi
           tags: ghcr.io/kyryl-opens-ml/app-fastapi:latest
+
+
+      - name: Build and push app-pytriton
+        uses: docker/build-push-action@v6
+        with:
+          context: module-5/
+          push: true
+          target: app-pytriton
+          tags: ghcr.io/kyryl-opens-ml/app-pytriton:latest
\ No newline at end of file
diff --git a/module-5/README.md b/module-5/README.md
index bab145e..401b3d4 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -87,6 +87,7 @@ pytest -ss ./tests
 # Triton Inference Server 
 
 
+## PyTriton
 
 ```
 docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
@@ -99,11 +100,6 @@ tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example
 ```
 
 
-- https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/triton/README.md
-- https://github.com/triton-inference-server/fastertransformer_backend
-- https://github.com/triton-inference-server/fastertransformer_backend
-
-
 
 
 
@@ -122,7 +118,7 @@ tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example
 Install 
 
 ```
-curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.11/hack/quick_install.sh" | bash
+curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.13/hack/quick_install.sh" | bash
 ```
 
 Deploy iris
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
index 2615176..8784be6 100644
--- a/module-5/serving/pytriton_client.py
+++ b/module-5/serving/pytriton_client.py
@@ -1,22 +1,23 @@
+import logging
 import numpy as np
 from pytriton.client import ModelClient
 
-client = ModelClient("localhost", "predictor_a")
-print(client.model_config)
 
+def main():
+    sequence = np.array([
+        ["one day I will see the world"],
+        ["I would love to learn cook the Asian street food"],
+        ["Carnival in Rio de Janeiro"],
+        ["William Shakespeare was a great writer"],
+    ])
+    sequence = np.char.encode(sequence, "utf-8")
 
-sequence = np.array([
-    ["one day I will see the world"],
-])
-sequence = np.char.encode(sequence, "utf-8")
-
-result_dict = client.infer_batch(text=sequence)
-
-data = np.array([1, 2, ], dtype=np.float32)
-print(client.infer_sample(text="test"))
-
-
-# kill -SIGINT 424
-# Response like a list for an amazing engineers. Don’t add comments or overlap. Keep it concise.
+    with ModelClient("0.0.0.0", "BART") as client:
+        result_dict = client.infer_batch(sequence)
+        for output_name, output_data in result_dict.items():
+            output_data = np.array2string(output_data, threshold=np.inf, max_line_width=np.inf, separator=",").replace("\n", "")
+            print(f"{output_name}: {output_data}.")
 
 
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
index 5b489f4..f962f46 100644
--- a/module-5/serving/pytriton_serving.py
+++ b/module-5/serving/pytriton_serving.py
@@ -1,31 +1,62 @@
 import logging
 
 import numpy as np
+from transformers import pipeline
 
 from pytriton.decorators import batch
 from pytriton.model_config import ModelConfig, Tensor
 from pytriton.triton import Triton
 
-from serving.predictor import Predictor
 
-logger = logging.getLogger("pytriton_serving")
+logger = logging.getLogger("server")
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s: %(message)s")
 
+CLASSIFIER = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+
+# Labels pre-cached on server side
+LABELS = [
+    "travel",
+    "cooking",
+    "dancing",
+    "sport",
+    "music",
+    "entertainment",
+    "festival",
+    "movie",
+    "literature",
+]
+
 
 @batch
-def _infer_fn(text: np.ndarray):
-    print(text)
-    return {"probs": np.array([[0.32, 0.312]])} 
+def _infer_fn(sequence: np.ndarray):
+    sequence = np.char.decode(sequence.astype("bytes"), "utf-8")
+    sequence = sequence.tolist()
+
+    logger.info(f"sequence = {sequence}")
+
+    classification_result = CLASSIFIER(sequence, LABELS)
+    result_labels = []
+    for result in classification_result:
+        logger.debug(result)
+        most_probable_label = result["labels"][0]
+        result_labels.append([most_probable_label])
+
+    return {"label": np.char.encode(result_labels, "utf-8")}
 
 
 def main():
+
     with Triton() as triton:
-        logger.info("Loading model.")
+        logger.info("Loading BART model.")
         triton.bind(
-            model_name="predictor_a",
+            model_name="BART",
             infer_func=_infer_fn,
-            inputs=[Tensor(name="text", dtype=object, shape=(-1,))],
-            outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],
+            inputs=[
+                Tensor(name="sequence", dtype=bytes, shape=(1,)),
+            ],
+            outputs=[
+                Tensor(name="label", dtype=bytes, shape=(1,)),
+            ],
             config=ModelConfig(max_batch_size=4),
         )
         logger.info("Serving inference")

From bdab9c349e04f73d960e6b90f124dd69f5a5d048 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 7 Sep 2024 00:03:56 -0400
Subject: [PATCH 06/10] ci

---
 module-5/serving/pytriton_client.py  |  5 +----
 module-5/serving/pytriton_serving.py | 25 +++++++++----------------
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
index 8784be6..c591836 100644
--- a/module-5/serving/pytriton_client.py
+++ b/module-5/serving/pytriton_client.py
@@ -2,13 +2,10 @@
 import numpy as np
 from pytriton.client import ModelClient
 
-
+# https://triton-inference-server.github.io/pytriton/latest/clients/
 def main():
     sequence = np.array([
         ["one day I will see the world"],
-        ["I would love to learn cook the Asian street food"],
-        ["Carnival in Rio de Janeiro"],
-        ["William Shakespeare was a great writer"],
     ])
     sequence = np.char.encode(sequence, "utf-8")
 
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
index f962f46..f49b62f 100644
--- a/module-5/serving/pytriton_serving.py
+++ b/module-5/serving/pytriton_serving.py
@@ -7,11 +7,12 @@
 from pytriton.model_config import ModelConfig, Tensor
 from pytriton.triton import Triton
 
+from serving.predictor import Predictor
 
 logger = logging.getLogger("server")
 logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s: %(message)s")
 
-CLASSIFIER = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+predictor = Predictor.default_from_model_registry()
 
 # Labels pre-cached on server side
 LABELS = [
@@ -30,17 +31,13 @@
 @batch
 def _infer_fn(sequence: np.ndarray):
     sequence = np.char.decode(sequence.astype("bytes"), "utf-8")
-    sequence = sequence.tolist()
+    sequence = sequence.tolist()[0]
 
     logger.info(f"sequence = {sequence}")
+    results = predictor.predict(text=sequence)
+    logger.info(f"results = {results}")
 
-    classification_result = CLASSIFIER(sequence, LABELS)
-    result_labels = []
-    for result in classification_result:
-        logger.debug(result)
-        most_probable_label = result["labels"][0]
-        result_labels.append([most_probable_label])
-
+    result_labels = ['travel' for _ in range(len(sequence))]
     return {"label": np.char.encode(result_labels, "utf-8")}
 
 
@@ -51,13 +48,9 @@ def main():
         triton.bind(
             model_name="BART",
             infer_func=_infer_fn,
-            inputs=[
-                Tensor(name="sequence", dtype=bytes, shape=(1,)),
-            ],
-            outputs=[
-                Tensor(name="label", dtype=bytes, shape=(1,)),
-            ],
-            config=ModelConfig(max_batch_size=4),
+            inputs=[Tensor(name="sequence", dtype=bytes, shape=(-1,)),],
+            outputs=[Tensor(name="label", dtype=bytes, shape=(1,)),],
+            config=ModelConfig(max_batch_size=1),
         )
         logger.info("Serving inference")
         triton.serve()

From 807c3ec076bef9211c451398b9400575364a1581 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 7 Sep 2024 00:21:40 -0400
Subject: [PATCH 07/10] ci

---
 module-5/.gitignore                  |  1 +
 module-5/README.md                   | 16 ++-----------
 module-5/serving/pytriton_client.py  | 14 ++++--------
 module-5/serving/pytriton_serving.py | 34 ++++++++--------------------
 4 files changed, 17 insertions(+), 48 deletions(-)
 create mode 100644 module-5/.gitignore

diff --git a/module-5/.gitignore b/module-5/.gitignore
new file mode 100644
index 0000000..a21fd91
--- /dev/null
+++ b/module-5/.gitignore
@@ -0,0 +1 @@
+.lock-file
diff --git a/module-5/README.md b/module-5/README.md
index 401b3d4..3500079 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -33,7 +33,7 @@ k9s -A
 
 
 ```
-export WANDB_API_KEY='put your key'
+export WANDB_API_KEY='your key here'
 ```
 
 
@@ -86,23 +86,11 @@ pytest -ss ./tests
 
 # Triton Inference Server 
 
-
-## PyTriton
-
 ```
-docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
-
-pip install -r /dev_data/requirements.txt
-export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-
-tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example/
-
+make run_pytriton
 ```
 
 
-
-
-
 # LLMs
 
 
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
index c591836..c01524e 100644
--- a/module-5/serving/pytriton_client.py
+++ b/module-5/serving/pytriton_client.py
@@ -4,16 +4,12 @@
 
 # https://triton-inference-server.github.io/pytriton/latest/clients/
 def main():
-    sequence = np.array([
-        ["one day I will see the world"],
-    ])
-    sequence = np.char.encode(sequence, "utf-8")
+    text = np.array([["one day I will see the world"],])
+    text = np.char.encode(text, "utf-8")
 
-    with ModelClient("0.0.0.0", "BART") as client:
-        result_dict = client.infer_batch(sequence)
-        for output_name, output_data in result_dict.items():
-            output_data = np.array2string(output_data, threshold=np.inf, max_line_width=np.inf, separator=",").replace("\n", "")
-            print(f"{output_name}: {output_data}.")
+    with ModelClient("0.0.0.0", "predictor_a") as client:
+        result_dict = client.infer_batch(text=text)
+        print(result_dict['probs'])
 
 
 if __name__ == "__main__":
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
index f49b62f..4f23a7a 100644
--- a/module-5/serving/pytriton_serving.py
+++ b/module-5/serving/pytriton_serving.py
@@ -1,7 +1,6 @@
 import logging
 
 import numpy as np
-from transformers import pipeline
 
 from pytriton.decorators import batch
 from pytriton.model_config import ModelConfig, Tensor
@@ -14,31 +13,16 @@
 
 predictor = Predictor.default_from_model_registry()
 
-# Labels pre-cached on server side
-LABELS = [
-    "travel",
-    "cooking",
-    "dancing",
-    "sport",
-    "music",
-    "entertainment",
-    "festival",
-    "movie",
-    "literature",
-]
-
 
 @batch
-def _infer_fn(sequence: np.ndarray):
-    sequence = np.char.decode(sequence.astype("bytes"), "utf-8")
-    sequence = sequence.tolist()[0]
+def _infer_fn(text: np.ndarray):
+    text = np.char.decode(text.astype("bytes"), "utf-8")
+    text = text.tolist()[0]
 
-    logger.info(f"sequence = {sequence}")
-    results = predictor.predict(text=sequence)
+    logger.info(f"sequence = {text}")
+    results = predictor.predict(text=text)
     logger.info(f"results = {results}")
-
-    result_labels = ['travel' for _ in range(len(sequence))]
-    return {"label": np.char.encode(result_labels, "utf-8")}
+    return [results]
 
 
 def main():
@@ -46,10 +30,10 @@ def main():
     with Triton() as triton:
         logger.info("Loading BART model.")
         triton.bind(
-            model_name="BART",
+            model_name="predictor_a",
             infer_func=_infer_fn,
-            inputs=[Tensor(name="sequence", dtype=bytes, shape=(-1,)),],
-            outputs=[Tensor(name="label", dtype=bytes, shape=(1,)),],
+            inputs=[Tensor(name="text", dtype=bytes, shape=(-1,)),],
+            outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],            
             config=ModelConfig(max_batch_size=1),
         )
         logger.info("Serving inference")

From 392b7cbf7062d3b6b29a2a93dc0021be6946f48b Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 7 Sep 2024 00:39:22 -0400
Subject: [PATCH 08/10] update

---
 .github/workflows/module-5.yaml | 67 +++++----------------------------
 module-5/Dockerfile             | 29 ++------------
 module-5/Makefile               | 10 ++---
 module-5/README.md              | 25 +++---------
 module-5/k8s/kserve-custom.yaml |  3 +-
 module-5/requirements.txt       |  3 --
 module-5/serving/kserve_api.py  |  4 +-
 7 files changed, 26 insertions(+), 115 deletions(-)

diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 59d8b42..9ba2228 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -9,57 +9,9 @@ on:
     branches:
       - main
     # paths:
-    #   - 'module-4/**'
+    #   - 'module-5/**'
 
 jobs:
-  # build:
-  #   runs-on: ubuntu-latest
-  #   steps:
-  #     - name: Checkout 
-  #       uses: actions/checkout@v2
-
-      # - name: Login to Docker Hub
-      #   uses: docker/login-action@v1
-      #   with:
-      #     username: ${{ secrets.DOCKER_HUB_USERNAME }}
-      #     password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
-      # - name: Set up Docker Buildx
-      #   uses: docker/setup-buildx-action@v1
-
-      # - name: Build app streamlit
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     context: week-5/
-      #     file: week-5/Dockerfile
-      #     push: true
-      #     target: app-streamlit
-      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:latest
-      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache
-      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache,mode=max
-
-      # - name: Build app fastapi
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     context: week-5/
-      #     file: week-5/Dockerfile
-      #     push: true
-      #     target: app-fastapi
-      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:latest
-      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache
-      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache,mode=max
-
-      # - name: Build app seldon
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     context: week-5/
-      #     file: week-5/Dockerfile
-      #     push: true
-      #     target: app-seldon
-      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
-      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
-      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
-
 
   docker-builds:
     runs-on: ubuntu-latest
@@ -77,12 +29,6 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
       
-      # - name: Extract metadata (tags, labels) for Docker
-      #   id: meta
-      #   uses: docker/metadata-action@9ec57ed1fcdbf14dcef7dfbe97b2010124a938b7
-      #   with:
-      #     images: ghcr.io/kyryl-opens-ml/app-streamlit
-
       - name: Build and push app-streamlit
         uses: docker/build-push-action@v6
         with:
@@ -99,11 +45,18 @@ jobs:
           target: app-fastapi
           tags: ghcr.io/kyryl-opens-ml/app-fastapi:latest
 
-
       - name: Build and push app-pytriton
         uses: docker/build-push-action@v6
         with:
           context: module-5/
           push: true
           target: app-pytriton
-          tags: ghcr.io/kyryl-opens-ml/app-pytriton:latest
\ No newline at end of file
+          tags: ghcr.io/kyryl-opens-ml/app-pytriton:latest
+
+      - name: Build and push app-kserve
+        uses: docker/build-push-action@v6
+        with:
+          context: module-5/
+          push: true
+          target: app-kserve
+          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest          
\ No newline at end of file
diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index ea2a850..4843c76 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -28,29 +28,6 @@ FROM base AS app-pytriton
 CMD python serving/pytriton_serving.py
 
 
-# FROM base AS app-seldon
-
-# # Port for GRPC
-# EXPOSE 5000
-# # Port for REST
-# EXPOSE 9000
-
-# # Define environment variables
-# ENV MODEL_NAME SeldonAPI
-# ENV SERVICE_TYPE MODEL
-# # COPY /app/serving/seldon_api.py /app/SeldonAPI.py
-# COPY serving/seldon_api.py /app/SeldonAPI.py
-
-# RUN chown -R 8888 /app
-# RUN mkdir /.cache
-# RUN chmod 777 /.cache
-# RUN mkdir /.config
-# RUN chmod 777 /.config
-
-# CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE
-
-
-# FROM base AS app-kserve
-# ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-# RUN pip install protobuf==3.20.*
-# ENTRYPOINT ["python", "serving/kserve_api.py"]
+FROM base AS app-kserve
+ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+ENTRYPOINT ["python", "serving/kserve_api.py"]
diff --git a/module-5/Makefile b/module-5/Makefile
index 1645b9d..5be0e21 100644
--- a/module-5/Makefile
+++ b/module-5/Makefile
@@ -19,8 +19,11 @@ build_pytriton:
 run_pytriton: build_pytriton
 	docker run -it -p 8001:8001 -p 8000:8000 -p 8002:8002 -e WANDB_API_KEY=${WANDB_API_KEY} app-pytriton:latest
 
+build_kserve:
+	docker build -f Dockerfile -t app-kserve:latest --target app-kserve .
 
-
+run_kserve: build_kserve
+	docker run -e PORT=8080 -e WANDB_API_KEY=${WANDB_API_KEY} -p 8081:8080 app-kserve:latest 
 
 
 build_app_seldon:
@@ -29,10 +32,5 @@ build_app_seldon:
 run_app_seldon: build_app_seldon
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-seldon:latest
 
-build_kserve:
-	docker build -f Dockerfile -t app-kserve:latest --target app-kserve .
-
-run_kserve:
-	docker run -e PORT=8080 -e WANDB_API_KEY=${WANDB_API_KEY} -p 8081:8080 app-kserve:latest 
 
 
diff --git a/module-5/README.md b/module-5/README.md
index 3500079..4ca6b88 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -109,14 +109,14 @@ Install
 curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.13/hack/quick_install.sh" | bash
 ```
 
-Deploy iris
+## IRIS
 
 ```
 kubectl create -f k8s/kserve-iris.yaml
 kubectl get inferenceservices sklearn-iris
 ```
 
-Port forward iris
+Port forward
 
 ```
 kubectl get svc --namespace istio-system
@@ -126,26 +126,11 @@ kubectl port-forward --namespace istio-system svc/istio-ingressgateway 8080:80
 Call API
 
 ```
-kubectl get inferenceservice sklearn-iris
-SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris -o jsonpath='{.status.url}' | cut -d "/" -f 3)
-
-export SERVICE_HOSTNAME=sklearn-iris.default.example.com
-export INGRESS_HOST=localhost
-export INGRESS_PORT=8080
-
-curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" "http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/sklearn-iris:predict" -d @./iris-input.json
-```
-
-Load test 
-
-
+curl -v -H "Host: sklearn-iris.default.example.com" -H "Content-Type: application/json" "http://localhost:8080/v1/models/sklearn-iris:predict" -d @data-samples/iris-input.json
 ```
-kubectl create -f https://raw.githubusercontent.com/kserve/kserve/release-0.11/docs/samples/v1beta1/sklearn/v1/perf.yaml
-```
-
 
+## Custom
 
-Custom model 
 
 - https://kserve.github.io/website/latest/modelserving/v1beta1/custom/custom_model/#build-custom-serving-image-with-buildpacks
 
@@ -154,7 +139,7 @@ docker build -f Dockerfile -t kyrylprojector/custom-model:latest --target app-ks
 docker push kyrylprojector/custom-model:latest
 
 docker run -e PORT=8080 -p 5000:8080 kyrylprojector/custom-model:latest
-curl localhost:5000/v1/models/custom-model:predict -d @./kserve-input.json
+curl localhost:5000/v1/models/custom-model:predict -d @data-samples/kserve-input.json
 
 
 kubectl create -f k8s/kserve-custom.yaml
diff --git a/module-5/k8s/kserve-custom.yaml b/module-5/k8s/kserve-custom.yaml
index 1d61226..4177d63 100644
--- a/module-5/k8s/kserve-custom.yaml
+++ b/module-5/k8s/kserve-custom.yaml
@@ -6,5 +6,4 @@ spec:
   predictor:
     containers:
       - name: kserve-container
-        image: kyrylprojector/custom-model:latest
-
+        image: ${DOCKER_USER}/custom-model:v1
diff --git a/module-5/requirements.txt b/module-5/requirements.txt
index 47d6bf6..6eb0bd5 100644
--- a/module-5/requirements.txt
+++ b/module-5/requirements.txt
@@ -8,6 +8,3 @@ kserve
 torch==2.4.1
 nvidia_pytriton==0.5.10
 ipython
-# seldon-core==1.14.1
-# # kserve==0.10.1
-# # ray==2.0.0
\ No newline at end of file
diff --git a/module-5/serving/kserve_api.py b/module-5/serving/kserve_api.py
index 10984ad..ebdcbcf 100644
--- a/module-5/serving/kserve_api.py
+++ b/module-5/serving/kserve_api.py
@@ -1,3 +1,4 @@
+import json
 from serving.predictor import Predictor
 from typing import Dict
 from kserve import Model, ModelServer
@@ -13,7 +14,8 @@ def load(self):
         self.ready = True
 
     def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
-        instances = payload["instances"]
+        json_payload = json.loads(payload.decode('utf-8'))
+        instances = json_payload["instances"]
         predictions = self.predictor.predict(instances)
         return {"predictions": predictions.tolist()}
 

From 2eac25a566e66db92793375f09b377ef2c978cb7 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 7 Sep 2024 23:55:53 -0400
Subject: [PATCH 09/10] style

---
 module-5/Dockerfile                  |  2 --
 module-5/k8s/app-fastapi.yaml        |  2 +-
 module-5/serving/fast_api.py         |  1 +
 module-5/serving/flask_api.py        |  2 +-
 module-5/serving/kserve_api.py       | 10 ++++++----
 module-5/serving/predictor.py        |  1 -
 module-5/serving/pytriton_client.py  | 11 ++++++++---
 module-5/serving/pytriton_serving.py | 15 +++++++++------
 module-5/serving/ui_app.py           |  4 ----
 9 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index 4843c76..f92fe01 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -1,4 +1,3 @@
-# FROM huggingface/transformers-pytorch-gpu:4.35.2 as base
 FROM python:3.11 as base
 
 WORKDIR /app
@@ -29,5 +28,4 @@ CMD python serving/pytriton_serving.py
 
 
 FROM base AS app-kserve
-ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
 ENTRYPOINT ["python", "serving/kserve_api.py"]
diff --git a/module-5/k8s/app-fastapi.yaml b/module-5/k8s/app-fastapi.yaml
index bd706d9..57947f5 100644
--- a/module-5/k8s/app-fastapi.yaml
+++ b/module-5/k8s/app-fastapi.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-fastapi
-          image: kyrylprojector/app-fastapi:latest
+          image: ghcr.io/kyryl-opens-ml/app-fastapi:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:
diff --git a/module-5/serving/fast_api.py b/module-5/serving/fast_api.py
index 4f784b1..fd97582 100644
--- a/module-5/serving/fast_api.py
+++ b/module-5/serving/fast_api.py
@@ -22,6 +22,7 @@ class Prediction(BaseModel):
 def health_check() -> str:
     return "ok"
 
+
 @app.post("/predict", response_model=Prediction)
 def predict(payload: Payload) -> Prediction:
     prediction = predictor.predict(text=payload.text)
diff --git a/module-5/serving/flask_api.py b/module-5/serving/flask_api.py
index 1d472e9..9e6281e 100644
--- a/module-5/serving/flask_api.py
+++ b/module-5/serving/flask_api.py
@@ -7,7 +7,7 @@
 
 @app.route("/predict", methods=["POST"])
 def predict():
-    payload = request.json['text']
+    payload = request.json["text"]
     result = predictor.predict(payload)
     return jsonify(result)
 
diff --git a/module-5/serving/kserve_api.py b/module-5/serving/kserve_api.py
index ebdcbcf..11e4906 100644
--- a/module-5/serving/kserve_api.py
+++ b/module-5/serving/kserve_api.py
@@ -3,22 +3,24 @@
 from typing import Dict
 from kserve import Model, ModelServer
 
+
 class CustomModel(Model):
     def __init__(self, name: str):
-       super().__init__(name)
-       self.name = name
-       self.load()
+        super().__init__(name)
+        self.name = name
+        self.load()
 
     def load(self):
         self.predictor = Predictor.default_from_model_registry()
         self.ready = True
 
     def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
-        json_payload = json.loads(payload.decode('utf-8'))
+        json_payload = json.loads(payload.decode("utf-8"))
         instances = json_payload["instances"]
         predictions = self.predictor.predict(instances)
         return {"predictions": predictions.tolist()}
 
+
 if __name__ == "__main__":
     model = CustomModel("custom-model")
     ModelServer().start([model])
diff --git a/module-5/serving/predictor.py b/module-5/serving/predictor.py
index 2f478b5..805c1a7 100644
--- a/module-5/serving/predictor.py
+++ b/module-5/serving/predictor.py
@@ -41,7 +41,6 @@ def default_from_model_registry(cls) -> "Predictor":
         with FileLock(MODEL_LOCK):
             if not (Path(MODEL_PATH) / "model.safetensors").exists():
                 load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
-
         return cls(model_load_path=MODEL_PATH)
 
     def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
index c01524e..ff5a959 100644
--- a/module-5/serving/pytriton_client.py
+++ b/module-5/serving/pytriton_client.py
@@ -2,15 +2,20 @@
 import numpy as np
 from pytriton.client import ModelClient
 
+
 # https://triton-inference-server.github.io/pytriton/latest/clients/
 def main():
-    text = np.array([["one day I will see the world"],])
+    text = np.array(
+        [
+            ["one day I will see the world"],
+        ]
+    )
     text = np.char.encode(text, "utf-8")
 
     with ModelClient("0.0.0.0", "predictor_a") as client:
         result_dict = client.infer_batch(text=text)
-        print(result_dict['probs'])
+        print(result_dict["probs"])
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
index 4f23a7a..490a0b6 100644
--- a/module-5/serving/pytriton_serving.py
+++ b/module-5/serving/pytriton_serving.py
@@ -26,19 +26,22 @@ def _infer_fn(text: np.ndarray):
 
 
 def main():
-
     with Triton() as triton:
-        logger.info("Loading BART model.")
+        logger.info("Loading models.")
         triton.bind(
             model_name="predictor_a",
             infer_func=_infer_fn,
-            inputs=[Tensor(name="text", dtype=bytes, shape=(-1,)),],
-            outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],            
-            config=ModelConfig(max_batch_size=1),
+            inputs=[
+                Tensor(name="text", dtype=bytes, shape=(-1,)),
+            ],
+            outputs=[
+                Tensor(name="probs", dtype=np.float32, shape=(-1,)),
+            ],
+            config=ModelConfig(max_batch_size=4),
         )
         logger.info("Serving inference")
         triton.serve()
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/module-5/serving/ui_app.py b/module-5/serving/ui_app.py
index 0e2f66c..4c9f525 100644
--- a/module-5/serving/ui_app.py
+++ b/module-5/serving/ui_app.py
@@ -22,7 +22,6 @@ def single_pred():
 
 def batch_pred():
     uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
-    
     if uploaded_file:
         dataframe = pd.read_csv(uploaded_file)
         st.write("Input dataframe")
@@ -34,12 +33,9 @@ def batch_pred():
 
 def main():
     st.header("UI serving demo")
-
     tab1, tab2 = st.tabs(["Single prediction", "Batch prediction"])
-
     with tab1:
         single_pred()
-
     with tab2:
         batch_pred()
 

From 5d9cedc01487152cd93a6354658e6a01480f4500 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 7 Sep 2024 23:58:22 -0400
Subject: [PATCH 10/10] ci

---
 .github/workflows/module-1-advanced.yaml | 2 +-
 module-4/requirements.txt                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/module-1-advanced.yaml b/.github/workflows/module-1-advanced.yaml
index 8dcb859..066e2f3 100644
--- a/.github/workflows/module-1-advanced.yaml
+++ b/.github/workflows/module-1-advanced.yaml
@@ -33,7 +33,7 @@ jobs:
 
       - name: Print pods
         run: |
-          kubectl wait --for=condition=available --timeout=90s deployment/deployments-app-web
+          kubectl wait --for=condition=available --timeout=180s deployment/deployments-app-web
 
       - name: Print pods
         run: |
diff --git a/module-4/requirements.txt b/module-4/requirements.txt
index a00b034..0881b2c 100644
--- a/module-4/requirements.txt
+++ b/module-4/requirements.txt
@@ -1,3 +1,3 @@
 kfp==2.8.0
-apache-airflow==2.9.3
+apache-airflow==2.10.0
 apache-airflow-providers-cncf-kubernetes==8.3.3