diff --git a/.github/workflows/module-1-advanced.yaml b/.github/workflows/module-1-advanced.yaml
index 8dcb859..066e2f3 100644
--- a/.github/workflows/module-1-advanced.yaml
+++ b/.github/workflows/module-1-advanced.yaml
@@ -33,7 +33,7 @@ jobs:
 
       - name: Print pods
         run: |
-          kubectl wait --for=condition=available --timeout=90s deployment/deployments-app-web
+          kubectl wait --for=condition=available --timeout=180s deployment/deployments-app-web
 
       - name: Print pods
         run: |
diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 38d01b5..9ba2228 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -1,53 +1,62 @@
 name: Module 5
 
 on:
-  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+    # paths:
+    #   - 'module-5/**'
 
 jobs:
-  build:
+
+  docker-builds:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
     steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
         with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Build app streamlit
-        uses: docker/build-push-action@v2
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+      
+      - name: Build and push app-streamlit
+        uses: docker/build-push-action@v6
         with:
-          context: week-5/
-          file: week-5/Dockerfile
+          context: module-5/
           push: true
           target: app-streamlit
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-streamlit:buildcache,mode=max
+          tags: ghcr.io/kyryl-opens-ml/app-streamlit:latest
 
-      - name: Build app fastapi
-        uses: docker/build-push-action@v2
+      - name: Build and push app-fastapi
+        uses: docker/build-push-action@v6
         with:
-          context: week-5/
-          file: week-5/Dockerfile
+          context: module-5/
           push: true
           target: app-fastapi
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi:buildcache,mode=max
+          tags: ghcr.io/kyryl-opens-ml/app-fastapi:latest
+
+      - name: Build and push app-pytriton
+        uses: docker/build-push-action@v6
+        with:
+          context: module-5/
+          push: true
+          target: app-pytriton
+          tags: ghcr.io/kyryl-opens-ml/app-pytriton:latest
 
-      - name: Build app seldon
-        uses: docker/build-push-action@v2
+      - name: Build and push app-kserve
+        uses: docker/build-push-action@v6
         with:
-          context: week-5/
-          file: week-5/Dockerfile
+          context: module-5/
           push: true
-          target: app-seldon
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
+          target: app-kserve
+          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest          
\ No newline at end of file
diff --git a/module-4/requirements.txt b/module-4/requirements.txt
index a00b034..0881b2c 100644
--- a/module-4/requirements.txt
+++ b/module-4/requirements.txt
@@ -1,3 +1,3 @@
 kfp==2.8.0
-apache-airflow==2.9.3
+apache-airflow==2.10.0
 apache-airflow-providers-cncf-kubernetes==8.3.3
diff --git a/module-5/.gitignore b/module-5/.gitignore
new file mode 100644
index 0000000..a21fd91
--- /dev/null
+++ b/module-5/.gitignore
@@ -0,0 +1 @@
+.lock-file
diff --git a/module-5/Dockerfile b/module-5/Dockerfile
index d35929c..f92fe01 100644
--- a/module-5/Dockerfile
+++ b/module-5/Dockerfile
@@ -1,18 +1,14 @@
-# FROM huggingface/transformers-pytorch-gpu:4.22.1 as base
-FROM huggingface/transformers-pytorch-gpu:4.35.2 as base
+FROM python:3.11 as base
 
 WORKDIR /app
 
 ENV LC_ALL=C.UTF-8
 ENV LANG=C.UTF-8
 
-RUN pip install pip --upgrade 
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
 
 
-RUN ln -s /usr/bin/python3 /usr/bin/python
-
 ENV PYTHONPATH /app
 COPY . . 
 
@@ -27,30 +23,9 @@ CMD streamlit run --server.address 0.0.0.0 --server.port 8080 serving/ui_app.py
 FROM base AS app-fastapi
 CMD uvicorn --host 0.0.0.0 --port 8080 --workers 4 serving.fast_api:app 
 
-
-FROM base AS app-seldon
-
-# Port for GRPC
-EXPOSE 5000
-# Port for REST
-EXPOSE 9000
-
-# Define environment variables
-ENV MODEL_NAME SeldonAPI
-ENV SERVICE_TYPE MODEL
-# COPY /app/serving/seldon_api.py /app/SeldonAPI.py
-COPY serving/seldon_api.py /app/SeldonAPI.py
-
-RUN chown -R 8888 /app
-RUN mkdir /.cache
-RUN chmod 777 /.cache
-RUN mkdir /.config
-RUN chmod 777 /.config
-
-CMD exec seldon-core-microservice $MODEL_NAME --service-type $SERVICE_TYPE
+FROM base AS app-pytriton
+CMD python serving/pytriton_serving.py
 
 
 FROM base AS app-kserve
-ENV WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-RUN pip install protobuf==3.20.*
 ENTRYPOINT ["python", "serving/kserve_api.py"]
diff --git a/module-5/Makefile b/module-5/Makefile
index d59b0fa..5be0e21 100644
--- a/module-5/Makefile
+++ b/module-5/Makefile
@@ -2,7 +2,7 @@ build_all:
 	docker build -f Dockerfile -t all:latest --target app-seldon .    
 
 build_app_streamlit:
-	docker build -f Dockerfile -t app-streamlit:latest --target app-streamlit .  
+	docker build -f Dockerfile -t app-streamlit:latest --target app-streamlit . 
 
 run_app_streamlit: build_app_streamlit
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-streamlit:latest
@@ -13,26 +13,24 @@ build_fast_api:
 run_fast_api: build_fast_api
 	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-fastapi:latest
 
-build_app_seldon:
-	docker build -f Dockerfile -t app-seldon:latest --target app-seldon .  
-
-run_app_seldon: build_app_seldon
-	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-seldon:latest
-
-run_dev: build_all
-	docker run -it --net=host -v $PWD:/dev_data -e WANDB_API_KEY=${WANDB_API_KEY} all:latest /bin/bash
+build_pytriton:
+	docker build -f Dockerfile -t app-pytriton:latest --target app-pytriton .  
 
-format:
-	black --line-length 120 serving tests
-	isort -rc serving tests
-
-lint:
-	flake8 --max-line-length 120 serving tests
+run_pytriton: build_pytriton
+	docker run -it -p 8001:8001 -p 8000:8000 -p 8002:8002 -e WANDB_API_KEY=${WANDB_API_KEY} app-pytriton:latest
 
 build_kserve:
 	docker build -f Dockerfile -t app-kserve:latest --target app-kserve .
 
-run_kserve:
+run_kserve: build_kserve
 	docker run -e PORT=8080 -e WANDB_API_KEY=${WANDB_API_KEY} -p 8081:8080 app-kserve:latest 
 
 
+build_app_seldon:
+	docker build -f Dockerfile -t app-seldon:latest --target app-seldon .  
+
+run_app_seldon: build_app_seldon
+	docker run -it -p 8081:8080 -e WANDB_API_KEY=${WANDB_API_KEY} app-seldon:latest
+
+
+
diff --git a/module-5/PRACTICE.md b/module-5/PRACTICE.md
index 097391d..90d2448 100644
--- a/module-5/PRACTICE.md
+++ b/module-5/PRACTICE.md
@@ -1,11 +1,10 @@
-# Practice 
-
-*** 
+# Practice
 
+***
 
 # H9: API serving
 
-## Reading list: 
+## Reading list:
 
 - [CS 329S Lecture 8. Model Deployment](https://docs.google.com/document/d/1hNuW6bqWYZjlwpit_8W1cu7kllb-jTfy3Liof1GJWug/edit#heading=h.kp1fg79091xd)
 - [Machine Learning Systems Design](https://docs.google.com/presentation/d/1U_zKs19VLJKnGE02JDRnzxJ8lgeVF22WSZ_GrA646fY/edit#slide=id.p)
@@ -16,7 +15,6 @@
 - [Gradio Quickstart](https://www.gradio.app/guides/quickstart) 
 - [Top 6 Kubernetes Deployment Strategies and How to Choose](https://codefresh.io/learn/kubernetes-deployment/top-6-kubernetes-deployment-strategies-and-how-to-choose/)
 
-
 ## Task:
 
 - PR1: Write a Streamlit UI for serving your model, with tests and CI integration.
@@ -24,14 +22,13 @@
 - PR3: Write a FastAPI server for your model, with tests and CI integration.
 - PR4: Write a Kubernetes deployment YAML (Deployment, Service) for your model's API.
 - PR5: Write a Kubernetes deployment YAML (Deployment, Service) for your model's UI (Streamlit, Gradio).
-- Google doc update with a model serving plan for your ML model. 
+- Google doc update with a model serving plan for your ML model.
 
-## Criteria: 
+## Criteria:
 
-- 5 PRs merged 
+- 5 PRs merged
 - Serving plan in the google doc.
 
-
 # H10: Inference servers
 
 ## Reading list:
@@ -52,17 +49,15 @@
 
 ## Task:
 
-
 - PR1: Write code for Seldon API deployment of your model, including tests.
 - PR2: Write code for KServe API integration with your model, including tests.
 - PR3: Write code for Triton Inference Server deployment, incorporating tests.
 - PR4: Write code for Ray deployment, complete with tests.
-- PR5: Write code for LLM deployment using TGI, vLLM, and LoRAX.
-- PR6: Write code for LLM deployment with ModalLab.
+- PR5 (optional): Write code for LLM deployment using TGI, vLLM, and LoRAX.
+- PR6 (optional): Write code for LLM deployment with ModalLab.
 - Update the Google document on model serving, outlining options and comparisons between custom servers and inference servers. Decide and explain which solution you will use and why.
 
-
 ## Criteria:
 
-- 6 PRs merged 
-- Serving comparisons and conclusion in the google doc.
+- 6 PRs merged
+- Serving comparisons and conclusion in the google doc.
\ No newline at end of file
diff --git a/module-5/README.md b/module-5/README.md
index 481f8ba..4ca6b88 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -2,29 +2,29 @@
 
 ![alt text](./../docs/serving.jpg)
 
-# Practice 
+# Practice
 
 [Practice task](./PRACTICE.md)
 
-*** 
+***
 
 # Reference implementation
 
-*** 
+***
 
 
 
-# Setup 
+# Setup
 
-Create kind cluster 
+Create kind cluster
 
-```
-kind create cluster --name ml-in-production-course-week-5
+```bash
+kind create cluster --name ml-in-production
 ```
 
-Run k9s 
+Run k9s
 
-```
+```bash
 k9s -A
 ```
 
@@ -33,7 +33,7 @@ k9s -A
 
 
 ```
-export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+export WANDB_API_KEY='your key here'
 ```
 
 
@@ -77,31 +77,20 @@ kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8081:8080
 # Test 
 
 ```
-http POST http://0.0.0.0:8080/predict < samples.json
+curl -X POST -H "Content-Type: application/json" -d @data-samples/samples.json http://0.0.0.0:8080/predict
 ```
 
 ```
 pytest -ss ./tests
 ```
 
-# Triton 
-
+# Triton Inference Server 
 
 ```
-docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
-
-pip install -r /dev_data/requirements.txt
-export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-
-tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example/
-
+make run_pytriton
 ```
 
 
-- https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/triton/README.md
-- https://github.com/triton-inference-server/fastertransformer_backend
-- https://github.com/triton-inference-server/fastertransformer_backend
-
 # LLMs
 
 
@@ -117,17 +106,17 @@ tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example
 Install 
 
 ```
-curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.11/hack/quick_install.sh" | bash
+curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.13/hack/quick_install.sh" | bash
 ```
 
-Deploy iris
+## IRIS
 
 ```
 kubectl create -f k8s/kserve-iris.yaml
 kubectl get inferenceservices sklearn-iris
 ```
 
-Port forward iris
+Port forward
 
 ```
 kubectl get svc --namespace istio-system
@@ -137,26 +126,11 @@ kubectl port-forward --namespace istio-system svc/istio-ingressgateway 8080:80
 Call API
 
 ```
-kubectl get inferenceservice sklearn-iris
-SERVICE_HOSTNAME=$(kubectl get inferenceservice sklearn-iris -o jsonpath='{.status.url}' | cut -d "/" -f 3)
-
-export SERVICE_HOSTNAME=sklearn-iris.default.example.com
-export INGRESS_HOST=localhost
-export INGRESS_PORT=8080
-
-curl -v -H "Host: ${SERVICE_HOSTNAME}" -H "Content-Type: application/json" "http://${INGRESS_HOST}:${INGRESS_PORT}/v1/models/sklearn-iris:predict" -d @./iris-input.json
-```
-
-Load test 
-
-
+curl -v -H "Host: sklearn-iris.default.example.com" -H "Content-Type: application/json" "http://localhost:8080/v1/models/sklearn-iris:predict" -d @data-samples/iris-input.json
 ```
-kubectl create -f https://raw.githubusercontent.com/kserve/kserve/release-0.11/docs/samples/v1beta1/sklearn/v1/perf.yaml
-```
-
 
+## Custom
 
-Custom model 
 
 - https://kserve.github.io/website/latest/modelserving/v1beta1/custom/custom_model/#build-custom-serving-image-with-buildpacks
 
@@ -165,7 +139,7 @@ docker build -f Dockerfile -t kyrylprojector/custom-model:latest --target app-ks
 docker push kyrylprojector/custom-model:latest
 
 docker run -e PORT=8080 -p 5000:8080 kyrylprojector/custom-model:latest
-curl localhost:5000/v1/models/custom-model:predict -d @./kserve-input.json
+curl localhost:5000/v1/models/custom-model:predict -d @data-samples/kserve-input.json
 
 
 kubectl create -f k8s/kserve-custom.yaml
diff --git a/module-5/iris-input.json b/module-5/data-samples/iris-input.json
similarity index 100%
rename from module-5/iris-input.json
rename to module-5/data-samples/iris-input.json
diff --git a/module-5/kserve-input.json b/module-5/data-samples/kserve-input.json
similarity index 100%
rename from module-5/kserve-input.json
rename to module-5/data-samples/kserve-input.json
diff --git a/module-5/samples.json b/module-5/data-samples/samples.json
similarity index 100%
rename from module-5/samples.json
rename to module-5/data-samples/samples.json
diff --git a/module-5/k8s/app-fastapi.yaml b/module-5/k8s/app-fastapi.yaml
index bd706d9..57947f5 100644
--- a/module-5/k8s/app-fastapi.yaml
+++ b/module-5/k8s/app-fastapi.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-fastapi
-          image: kyrylprojector/app-fastapi:latest
+          image: ghcr.io/kyryl-opens-ml/app-fastapi:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:
diff --git a/module-5/k8s/app-streamlit.yaml b/module-5/k8s/app-streamlit.yaml
index 20899d6..edf4a98 100644
--- a/module-5/k8s/app-streamlit.yaml
+++ b/module-5/k8s/app-streamlit.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-streamlit
-          image: kyrylprojector/app-streamlit:latest
+          image: ghcr.io/kyryl-opens-ml/app-streamlit:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:
diff --git a/module-5/k8s/kserve-custom.yaml b/module-5/k8s/kserve-custom.yaml
index 1d61226..4177d63 100644
--- a/module-5/k8s/kserve-custom.yaml
+++ b/module-5/k8s/kserve-custom.yaml
@@ -6,5 +6,4 @@ spec:
   predictor:
     containers:
       - name: kserve-container
-        image: kyrylprojector/custom-model:latest
-
+        image: ${DOCKER_USER}/custom-model:v1
diff --git a/module-5/requirements.txt b/module-5/requirements.txt
index 23fdb2f..6eb0bd5 100644
--- a/module-5/requirements.txt
+++ b/module-5/requirements.txt
@@ -1,12 +1,10 @@
-gunicorn==22.0.0
-streamlit==1.36.0
-uvicorn==0.24.0.post1
-fastapi==0.111.0
-transformers==4.42.3
-datasets==2.14.6
-typer==0.9.0
-wandb==0.16.1
-kserve  
-# seldon-core==1.14.1
-# # kserve==0.10.1
-# # ray==2.0.0
\ No newline at end of file
+transformers==4.44.2
+gunicorn==23.0.0
+streamlit==1.38.0
+uvicorn==0.21.1
+fastapi==0.109.2
+wandb==0.17.9
+kserve 
+torch==2.4.1
+nvidia_pytriton==0.5.10
+ipython
diff --git a/module-5/serving/fast_api.py b/module-5/serving/fast_api.py
index 4f784b1..fd97582 100644
--- a/module-5/serving/fast_api.py
+++ b/module-5/serving/fast_api.py
@@ -22,6 +22,7 @@ class Prediction(BaseModel):
 def health_check() -> str:
     return "ok"
 
+
 @app.post("/predict", response_model=Prediction)
 def predict(payload: Payload) -> Prediction:
     prediction = predictor.predict(text=payload.text)
diff --git a/module-5/serving/flask_api.py b/module-5/serving/flask_api.py
index 1d472e9..9e6281e 100644
--- a/module-5/serving/flask_api.py
+++ b/module-5/serving/flask_api.py
@@ -7,7 +7,7 @@
 
 @app.route("/predict", methods=["POST"])
 def predict():
-    payload = request.json['text']
+    payload = request.json["text"]
     result = predictor.predict(payload)
     return jsonify(result)
 
diff --git a/module-5/serving/kserve_api.py b/module-5/serving/kserve_api.py
index 10984ad..11e4906 100644
--- a/module-5/serving/kserve_api.py
+++ b/module-5/serving/kserve_api.py
@@ -1,22 +1,26 @@
+import json
 from serving.predictor import Predictor
 from typing import Dict
 from kserve import Model, ModelServer
 
+
 class CustomModel(Model):
     def __init__(self, name: str):
-       super().__init__(name)
-       self.name = name
-       self.load()
+        super().__init__(name)
+        self.name = name
+        self.load()
 
     def load(self):
         self.predictor = Predictor.default_from_model_registry()
         self.ready = True
 
     def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
-        instances = payload["instances"]
+        json_payload = json.loads(payload.decode("utf-8"))
+        instances = json_payload["instances"]
         predictions = self.predictor.predict(instances)
         return {"predictions": predictions.tolist()}
 
+
 if __name__ == "__main__":
     model = CustomModel("custom-model")
     ModelServer().start([model])
diff --git a/module-5/serving/predictor.py b/module-5/serving/predictor.py
index e956c50..805c1a7 100644
--- a/module-5/serving/predictor.py
+++ b/module-5/serving/predictor.py
@@ -12,7 +12,7 @@
 
 logger = logging.getLogger()
 
-MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest"
+MODEL_ID = "truskovskiyk/ml-in-production-practice/airflow-pipeline:latest"
 MODEL_PATH = "/tmp/model"
 MODEL_LOCK = ".lock-file"
 
@@ -34,14 +34,13 @@ def __init__(self, model_load_path: str):
     def predict(self, text: List[str]):
         text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True)
         bert_outputs = self.model(**text_encoded).logits
-        return softmax(bert_outputs).numpy()
+        return softmax(bert_outputs, dim=-1).numpy()
 
     @classmethod
     def default_from_model_registry(cls) -> "Predictor":
         with FileLock(MODEL_LOCK):
-            if not (Path(MODEL_PATH) / "pytorch_model.bin").exists():
+            if not (Path(MODEL_PATH) / "model.safetensors").exists():
                 load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
-
         return cls(model_load_path=MODEL_PATH)
 
     def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
new file mode 100644
index 0000000..ff5a959
--- /dev/null
+++ b/module-5/serving/pytriton_client.py
@@ -0,0 +1,21 @@
+import logging
+import numpy as np
+from pytriton.client import ModelClient
+
+
+# https://triton-inference-server.github.io/pytriton/latest/clients/
+def main():
+    text = np.array(
+        [
+            ["one day I will see the world"],
+        ]
+    )
+    text = np.char.encode(text, "utf-8")
+
+    with ModelClient("0.0.0.0", "predictor_a") as client:
+        result_dict = client.infer_batch(text=text)
+        print(result_dict["probs"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
new file mode 100644
index 0000000..490a0b6
--- /dev/null
+++ b/module-5/serving/pytriton_serving.py
@@ -0,0 +1,47 @@
+import logging
+
+import numpy as np
+
+from pytriton.decorators import batch
+from pytriton.model_config import ModelConfig, Tensor
+from pytriton.triton import Triton
+
+from serving.predictor import Predictor
+
+logger = logging.getLogger("server")
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(name)s: %(message)s")
+
+predictor = Predictor.default_from_model_registry()
+
+
+@batch
+def _infer_fn(text: np.ndarray):
+    text = np.char.decode(text.astype("bytes"), "utf-8")
+    text = text.tolist()[0]
+
+    logger.info(f"sequence = {text}")
+    results = predictor.predict(text=text)
+    logger.info(f"results = {results}")
+    return [results]
+
+
+def main():
+    with Triton() as triton:
+        logger.info("Loading models.")
+        triton.bind(
+            model_name="predictor_a",
+            infer_func=_infer_fn,
+            inputs=[
+                Tensor(name="text", dtype=bytes, shape=(-1,)),
+            ],
+            outputs=[
+                Tensor(name="probs", dtype=np.float32, shape=(-1,)),
+            ],
+            config=ModelConfig(max_batch_size=4),
+        )
+        logger.info("Serving inference")
+        triton.serve()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/module-5/serving/ui_app.py b/module-5/serving/ui_app.py
index 0e2f66c..4c9f525 100644
--- a/module-5/serving/ui_app.py
+++ b/module-5/serving/ui_app.py
@@ -22,7 +22,6 @@ def single_pred():
 
 def batch_pred():
     uploaded_file = st.file_uploader("Choose a CSV file", type=["csv"])
-    
     if uploaded_file:
         dataframe = pd.read_csv(uploaded_file)
         st.write("Input dataframe")
@@ -34,12 +33,9 @@ def batch_pred():
 
 def main():
     st.header("UI serving demo")
-
     tab1, tab2 = st.tabs(["Single prediction", "Batch prediction"])
-
     with tab1:
         single_pred()
-
     with tab2:
         batch_pred()
 
diff --git a/module-5/triton-python-example/add_sub/1/model.py b/module-5/triton-python-example/add_sub/1/model.py
deleted file mode 100644
index 525f447..0000000
--- a/module-5/triton-python-example/add_sub/1/model.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import json
-import triton_python_backend_utils as pb_utils
-
-
-class TritonPythonModel:
-    """Your Python model must use the same class name. Every Python model
-    that is created must have "TritonPythonModel" as the class name.
-    """
-
-    def initialize(self, args):
-        """`initialize` is called only once when the model is being loaded.
-        Implementing `initialize` function is optional. This function allows
-        the model to initialize any state associated with this model.
-
-        Parameters
-        ----------
-        args : dict
-          Both keys and values are strings. The dictionary keys and values are:
-          * model_config: A JSON string containing the model configuration
-          * model_instance_kind: A string containing model instance kind
-          * model_instance_device_id: A string containing model instance device ID
-          * model_repository: Model repository path
-          * model_version: Model version
-          * model_name: Model name
-        """
-
-        # You must parse model_config. JSON string is not parsed here
-        self.model_config = model_config = json.loads(args["model_config"])
-
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
-
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
-
-        # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config["data_type"]
-        )
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config["data_type"]
-        )
-
-    def execute(self, requests):
-        """`execute` MUST be implemented in every Python model. `execute`
-        function receives a list of pb_utils.InferenceRequest as the only
-        argument. This function is called when an inference request is made
-        for this model. Depending on the batching configuration (e.g. Dynamic
-        Batching) used, `requests` may contain multiple requests. Every
-        Python model, must create one pb_utils.InferenceResponse for every
-        pb_utils.InferenceRequest in `requests`. If there is an error, you can
-        set the error argument when creating a pb_utils.InferenceResponse
-
-        Parameters
-        ----------
-        requests : list
-          A list of pb_utils.InferenceRequest
-
-        Returns
-        -------
-        list
-          A list of pb_utils.InferenceResponse. The length of this list must
-          be the same as `requests`
-        """
-
-        output0_dtype = self.output0_dtype
-        output1_dtype = self.output1_dtype
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for request in requests:
-            # Get INPUT0
-            in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
-            # Get INPUT1
-            in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
-
-            out_0, out_1 = (
-                in_0.as_numpy() + in_1.as_numpy(),
-                in_0.as_numpy() - in_1.as_numpy(),
-            )
-
-            # Create output tensors. You need pb_utils.Tensor
-            # objects to create pb_utils.InferenceResponse.
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
-            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
-
-            # Create InferenceResponse. You can set an error here in case
-            # there was a problem with handling this inference request.
-            # Below is an example of how you can set errors in inference
-            # response:
-            #
-            # pb_utils.InferenceResponse(
-            #    output_tensors=..., TritonError("An error occurred"))
-            inference_response = pb_utils.InferenceResponse(
-                output_tensors=[out_tensor_0, out_tensor_1]
-            )
-            responses.append(inference_response)
-
-        # You should return a list of pb_utils.InferenceResponse. Length
-        # of this list must match the length of `requests` list.
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is OPTIONAL. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print("Cleaning up...")
\ No newline at end of file
diff --git a/module-5/triton-python-example/add_sub/client.py b/module-5/triton-python-example/add_sub/client.py
deleted file mode 100644
index ea4f8b2..0000000
--- a/module-5/triton-python-example/add_sub/client.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import sys
-
-import numpy as np
-import tritonclient.http as httpclient
-from tritonclient.utils import *
-
-model_name = "add_sub"
-shape = [4]
-
-with httpclient.InferenceServerClient("localhost:8000") as client:
-
-    input0_data = np.random.rand(*shape).astype(np.float32)
-    input1_data = np.random.rand(*shape).astype(np.float32)
-
-
-    inputs = [
-        httpclient.InferInput("INPUT0", input0_data.shape, np_to_triton_dtype(input0_data.dtype)),
-        httpclient.InferInput("INPUT1", input1_data.shape, np_to_triton_dtype(input1_data.dtype)),
-    ]
-
-    inputs[0].set_data_from_numpy(input0_data)
-    inputs[1].set_data_from_numpy(input1_data)
-
-    outputs = [
-        httpclient.InferRequestedOutput("OUTPUT0"),
-        httpclient.InferRequestedOutput("OUTPUT1"),
-    ]
-
-    response = client.infer(model_name, inputs, request_id=str(1), outputs=outputs)
-
-    result = response.get_response()
-    output0_data = response.as_numpy("OUTPUT0")
-    output1_data = response.as_numpy("OUTPUT1")
-
-    print(
-        "INPUT0 ({}) + INPUT1 ({}) = OUTPUT0 ({})".format(
-            input0_data, input1_data, output0_data
-        )
-    )
-    print(
-        "INPUT0 ({}) - INPUT1 ({}) = OUTPUT1 ({})".format(
-            input0_data, input1_data, output1_data
-        )
-    )
-
-    if not np.allclose(input0_data + input1_data, output0_data):
-        print("add_sub example error: incorrect sum")
-        sys.exit(1)
-
-    if not np.allclose(input0_data - input1_data, output1_data):
-        print("add_sub example error: incorrect difference")
-        sys.exit(1)
-
-    print("PASS: add_sub")
-    sys.exit(0)
\ No newline at end of file
diff --git a/module-5/triton-python-example/add_sub/config.pbtxt b/module-5/triton-python-example/add_sub/config.pbtxt
deleted file mode 100644
index 105ec79..0000000
--- a/module-5/triton-python-example/add_sub/config.pbtxt
+++ /dev/null
@@ -1,33 +0,0 @@
-name: "add_sub"
-backend: "vlmlm"
-
-input [
-  {
-    name: "INPUT0"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-input [
-  {
-    name: "INPUT1"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT1"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-
-instance_group [{ kind: KIND_CPU }]
\ No newline at end of file
diff --git a/module-5/triton-python-example/nlp-model/1/model.py b/module-5/triton-python-example/nlp-model/1/model.py
deleted file mode 100644
index bd7f617..0000000
--- a/module-5/triton-python-example/nlp-model/1/model.py
+++ /dev/null
@@ -1,120 +0,0 @@
-import os
-import albumentations as A
-import boto3
-import numpy as np
-import torch
-from PIL import Image
-import requests
-from PIL import Image
-from io import BytesIO
-import requests
-import json
-from pathlib import Path
-
-
-import json
-import triton_python_backend_utils as pb_utils
-import torchvision
-
-import logging
-from pathlib import Path
-from typing import List
-
-import pandas as pd
-import torch
-import wandb
-from filelock import FileLock
-from torch.nn.functional import softmax
-from tqdm import tqdm
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-logger = logging.getLogger()
-
-MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest"
-MODEL_PATH = "/tmp/model"
-MODEL_LOCK = ".lock-file"
-
-
-def load_from_registry(model_name: str, model_path: Path):
-    with wandb.init() as run:
-        artifact = run.use_artifact(model_name, type="model")
-        artifact_dir = artifact.download(root=model_path)
-        print(f"{artifact_dir}")
-
-
-class Predictor:
-    def __init__(self, model_load_path: str):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_load_path)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_load_path)
-        self.model.eval()
-
-    @torch.no_grad()
-    def predict(self, text: List[str]):
-        text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True)
-        bert_outputs = self.model(**text_encoded).logits
-        return softmax(bert_outputs).numpy()
-
-    @classmethod
-    def default_from_model_registry(cls) -> "Predictor":
-        with FileLock(MODEL_LOCK):
-            if not (Path(MODEL_PATH) / "pytorch_model.bin").exists():
-                load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
-
-        return cls(model_load_path=MODEL_PATH)
-
-    def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
-        correct_sentence_conf = []
-        for idx in tqdm(range(len(df))):
-            sentence = df.iloc[idx]["sentence"]
-            conf = self.predict([sentence]).flatten()[1]
-            correct_sentence_conf.append(conf)
-        df["correct_sentence_conf"] = correct_sentence_conf
-        return df
-
-    
-class TritonPythonModel:
-    def initialize(self, args):
-        self.model_config = model_config = json.loads(args["model_config"])
-
-        output0_config = pb_utils.get_output_config_by_name(model_config, "pred_boxes")
-        output1_config = pb_utils.get_output_config_by_name(model_config, "scores")
-        output2_config = pb_utils.get_output_config_by_name(model_config, "pred_classes")
-
-        self.output0_dtype = pb_utils.triton_string_to_numpy(output0_config["data_type"])
-        self.output1_dtype = pb_utils.triton_string_to_numpy(output1_config["data_type"])
-        self.output2_dtype = pb_utils.triton_string_to_numpy(output2_config["data_type"])
-
-        self.Predictor = Predictor.default_from_model_registry()
-
-    def execute(self, requests):
-        output0_dtype = self.output0_dtype
-        output1_dtype = self.output1_dtype
-        output2_dtype = self.output2_dtype
-
-        responses = []
-
-        # Every Python backend must iterate over everyone of the requests
-        # and create a pb_utils.InferenceResponse for each of them.
-        for request in requests:
-            in_0 = pb_utils.get_input_tensor_by_name(request, "text")
-            print(in_0.as_numpy())
-            url = str(in_0.as_numpy()[0], encoding="utf-8")
-            print(url, type(url))
-
-            output = self.damage_segmentation_model.process_image(url=url)
-
-            out_tensor_0 = pb_utils.Tensor("pred_boxes", output["pred_boxes"].astype(output0_dtype))
-            out_tensor_1 = pb_utils.Tensor("scores", output["scores"].astype(output1_dtype))
-            out_tensor_2 = pb_utils.Tensor("pred_classes", output["pred_classes"].astype(output2_dtype))
-
-            inference_response = pb_utils.InferenceResponse(output_tensors=[out_tensor_0, out_tensor_1, out_tensor_2])
-            responses.append(inference_response)
-
-        return responses
-
-    def finalize(self):
-        """`finalize` is called only once when the model is being unloaded.
-        Implementing `finalize` function is optional. This function allows
-        the model to perform any necessary clean ups before exit.
-        """
-        print("Cleaning up...")
diff --git a/module-5/triton-python-example/nlp-model/config.pbtxt b/module-5/triton-python-example/nlp-model/config.pbtxt
deleted file mode 100644
index 27bb1e1..0000000
--- a/module-5/triton-python-example/nlp-model/config.pbtxt
+++ /dev/null
@@ -1,22 +0,0 @@
-
-name: "nlp-model"
-backend: "python"
-
-input [
-  {
-    name: "text"
-    data_type: TYPE_STRING
-    dims: [ 1 ]
-    
-  }
-]
-
-output [
-  {
-    name: "pred_boxes"
-    data_type: TYPE_FP32
-    dims: [ 100, 4 ]
-  }
-]
-
-instance_group [{ kind: KIND_CPU }]