diff --git a/module-5/.gitignore b/module-5/.gitignore
new file mode 100644
index 0000000..a21fd91
--- /dev/null
+++ b/module-5/.gitignore
@@ -0,0 +1 @@
+.lock-file
diff --git a/module-5/README.md b/module-5/README.md
index 401b3d4..3500079 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -33,7 +33,7 @@ k9s -A
 
 
 ```
-export WANDB_API_KEY='put your key'
+export WANDB_API_KEY='your key here'
 ```
 
 
@@ -86,23 +86,11 @@ pytest -ss ./tests
 
 # Triton Inference Server 
 
-
-## PyTriton
-
 ```
-docker run -v $PWD:/dev_data --shm-size=1g --ulimit memlock=-1 --net=host --ulimit stack=67108864 -ti nvcr.io/nvidia/tritonserver:23.11-vllm-python-py3 /bin/bash
-
-pip install -r /dev_data/requirements.txt
-export WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
-
-tritonserver --http-port 5000 --model-repository /dev_data/triton-python-example/
-
+make run_pytriton
 ```
 
 
-
-
-
 # LLMs
 
 
diff --git a/module-5/serving/pytriton_client.py b/module-5/serving/pytriton_client.py
index c591836..c01524e 100644
--- a/module-5/serving/pytriton_client.py
+++ b/module-5/serving/pytriton_client.py
@@ -4,16 +4,12 @@
 
 # https://triton-inference-server.github.io/pytriton/latest/clients/
 def main():
-    sequence = np.array([
-        ["one day I will see the world"],
-    ])
-    sequence = np.char.encode(sequence, "utf-8")
+    text = np.array([["one day I will see the world"],])
+    text = np.char.encode(text, "utf-8")
 
-    with ModelClient("0.0.0.0", "BART") as client:
-        result_dict = client.infer_batch(sequence)
-        for output_name, output_data in result_dict.items():
-            output_data = np.array2string(output_data, threshold=np.inf, max_line_width=np.inf, separator=",").replace("\n", "")
-            print(f"{output_name}: {output_data}.")
+    with ModelClient("0.0.0.0", "predictor_a") as client:
+        result_dict = client.infer_batch(text=text)
+        print(result_dict['probs'])
 
 
 if __name__ == "__main__":
diff --git a/module-5/serving/pytriton_serving.py b/module-5/serving/pytriton_serving.py
index f49b62f..4f23a7a 100644
--- a/module-5/serving/pytriton_serving.py
+++ b/module-5/serving/pytriton_serving.py
@@ -1,7 +1,6 @@
 import logging
 
 import numpy as np
-from transformers import pipeline
 
 from pytriton.decorators import batch
 from pytriton.model_config import ModelConfig, Tensor
@@ -14,31 +13,16 @@
 
 predictor = Predictor.default_from_model_registry()
 
-# Labels pre-cached on server side
-LABELS = [
-    "travel",
-    "cooking",
-    "dancing",
-    "sport",
-    "music",
-    "entertainment",
-    "festival",
-    "movie",
-    "literature",
-]
-
 
 @batch
-def _infer_fn(sequence: np.ndarray):
-    sequence = np.char.decode(sequence.astype("bytes"), "utf-8")
-    sequence = sequence.tolist()[0]
+def _infer_fn(text: np.ndarray):
+    text = np.char.decode(text.astype("bytes"), "utf-8")
+    text = text.tolist()[0]
 
-    logger.info(f"sequence = {sequence}")
-    results = predictor.predict(text=sequence)
+    logger.info(f"sequence = {text}")
+    results = predictor.predict(text=text)
     logger.info(f"results = {results}")
-
-    result_labels = ['travel' for _ in range(len(sequence))]
-    return {"label": np.char.encode(result_labels, "utf-8")}
+    return [results]
 
 
 def main():
@@ -46,10 +30,10 @@ def main():
     with Triton() as triton:
         logger.info("Loading BART model.")
         triton.bind(
-            model_name="BART",
+            model_name="predictor_a",
             infer_func=_infer_fn,
-            inputs=[Tensor(name="sequence", dtype=bytes, shape=(-1,)),],
-            outputs=[Tensor(name="label", dtype=bytes, shape=(1,)),],
+            inputs=[Tensor(name="text", dtype=bytes, shape=(-1,)),],
+            outputs=[Tensor(name="probs", dtype=np.float32, shape=(-1,)),],            
             config=ModelConfig(max_batch_size=1),
         )
         logger.info("Serving inference")