From 757bb6815cabfb917e2a414824899c2741d4f838 Mon Sep 17 00:00:00 2001
From: truskovskiyk <truskovskiyk@gmail.com>
Date: Sat, 14 Sep 2024 00:36:35 -0400
Subject: [PATCH] ci

---
 .github/workflows/module-5.yaml               |   2 +-
 .github/workflows/module-6.yaml               |  55 +++-----
 module-5/README.md                            |  46 -------
 module-5/k8s/app-triton.yaml                  |   2 +-
 module-6/.gitignore                           |   1 +
 module-6/README.md                            | 129 ++++++++++--------
 module-6/data/iris-input.json                 |   6 -
 module-6/data/text-input.json                 |  24 ----
 ...{fastapi-hpa.yaml => app-fastapi-hpa.yaml} |   0
 ...pi-app.yaml => app-fastapi-resources.yaml} |  24 ++--
 module-6/k8s/fastapi-locust.yaml              |  18 ---
 module-6/k8s/kafka-infra.yaml                 |  89 ------------
 module-6/k8s/kafka-model-new.yaml             |  41 ------
 module-6/k8s/kafka-ui-values.yml              |  11 --
 module-6/k8s/kserve-custom.yaml               |   9 --
 .../kserve-inferenceserver-autoscaling.yaml   |  18 +++
 module-6/k8s/kserve-iris.yaml                 |  10 --
 module-6/load-testing/load_test.js            |  47 +++++++
 module-6/load-testing/perf.yaml               |  48 -------
 module-6/load-testing/vegeta-job.yaml         |  56 ++++++++
 module-6/queue/simple_queue.py                |  45 ++++++
 module-6/serving/fast_api.py                  |  29 ----
 module-6/serving/kserve_api.py                |  40 ------
 module-6/serving/kserve_transformer.py        |  72 ----------
 module-6/serving/predictor.py                 |  55 --------
 25 files changed, 268 insertions(+), 609 deletions(-)
 create mode 100644 module-6/.gitignore
 delete mode 100644 module-6/data/iris-input.json
 delete mode 100644 module-6/data/text-input.json
 rename module-6/k8s/{fastapi-hpa.yaml => app-fastapi-hpa.yaml} (100%)
 rename module-6/k8s/{fastapi-app.yaml => app-fastapi-resources.yaml} (58%)
 delete mode 100644 module-6/k8s/fastapi-locust.yaml
 delete mode 100644 module-6/k8s/kafka-infra.yaml
 delete mode 100644 module-6/k8s/kafka-model-new.yaml
 delete mode 100644 module-6/k8s/kafka-ui-values.yml
 delete mode 100644 module-6/k8s/kserve-custom.yaml
 create mode 100644 module-6/k8s/kserve-inferenceserver-autoscaling.yaml
 delete mode 100644 module-6/k8s/kserve-iris.yaml
 create mode 100644 module-6/load-testing/load_test.js
 delete mode 100644 module-6/load-testing/perf.yaml
 create mode 100644 module-6/load-testing/vegeta-job.yaml
 create mode 100644 module-6/queue/simple_queue.py
 delete mode 100644 module-6/serving/fast_api.py
 delete mode 100644 module-6/serving/kserve_api.py
 delete mode 100644 module-6/serving/kserve_transformer.py
 delete mode 100644 module-6/serving/predictor.py

diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml
index 9ba2228..767207f 100644
--- a/.github/workflows/module-5.yaml
+++ b/.github/workflows/module-5.yaml
@@ -59,4 +59,4 @@ jobs:
           context: module-5/
           push: true
           target: app-kserve
-          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest          
\ No newline at end of file
+          tags: ghcr.io/kyryl-opens-ml/app-kserve:latest
\ No newline at end of file
diff --git a/.github/workflows/module-6.yaml b/.github/workflows/module-6.yaml
index f16c58a..ef81a2b 100644
--- a/.github/workflows/module-6.yaml
+++ b/.github/workflows/module-6.yaml
@@ -1,42 +1,29 @@
 name: Module 6
 
 on:
-  workflow_dispatch:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+    # paths:
+    #   - 'module-6/**'
 
 jobs:
-  build:
+  docker-builds:
     runs-on: ubuntu-latest
+    permissions:
+      contents: read
+      packages: write
     steps:
-      - name: Checkout 
-        uses: actions/checkout@v2
-
-      - name: Login to Docker Hub
-        uses: docker/login-action@v1
-        with:
-          username: ${{ secrets.DOCKER_HUB_USERNAME }}
-          password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
-
-      - name: Set up Docker Buildx
-        uses: docker/setup-buildx-action@v1
-
-      - name: Build app fastapi
-        uses: docker/build-push-action@v2
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
         with:
-          context: week-6/
-          file: week-6/Dockerfile
-          push: true
-          target: app-fastapi
-          tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:latest
-          cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache
-          cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache,mode=max
-
-      # - name: Build app seldon
-      #   uses: docker/build-push-action@v2
-      #   with:
-      #     context: week-5/
-      #     file: week-5/Dockerfile
-      #     push: true
-      #     target: app-seldon
-      #     tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest
-      #     cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache
-      #     cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max
+          registry: ghcr.io
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
diff --git a/module-5/README.md b/module-5/README.md
index 052818e..8a3a5ab 100644
--- a/module-5/README.md
+++ b/module-5/README.md
@@ -34,10 +34,6 @@ k9s -A
 
 ```
 export WANDB_API_KEY='your key here'
-```
-
-
-```
 kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY
 ```
 
@@ -119,45 +115,3 @@ Call API
 ```
 curl -v -H "Host: custom-model.default.example.com" -H "Content-Type: application/json" "http://localhost:8080/v1/models/custom-model:predict" -d @data-samples/kserve-input.json
 ```
-
-
-# Seldon V2 
-
-```
-git clone https://github.com/SeldonIO/seldon-core --branch=v2
-```
-
-
-# Seldon V1
-
-
-## Install with helm
-
-```
-kubectl apply -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-crds.yaml
-kubectl apply -n ambassador -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-kind.yaml
-kubectl wait --timeout=180s -n ambassador --for=condition=deployed ambassadorinstallations/ambassador
-
-kubectl create namespace seldon-system
-
-helm install seldon-core seldon-core-operator --version 1.15.1 --repo https://storage.googleapis.com/seldon-charts --set usageMetrics.enabled=true --set ambassador.enabled=true  --namespace seldon-system
-```
-
-## Port forward 
-
-```
-kubectl port-forward  --address 0.0.0.0 -n ambassador svc/ambassador 7777:80
-```
-
-
-## Custom example
-```
-kubectl create -f k8s/seldon-custom.yaml
-
-open http://IP:7777/seldon/default/nlp-sample/api/v1.0/doc/#/
-{ "data": { "ndarray": ["this is an example"] } }
-
-
-curl -X POST "http://IP:7777/seldon/default/nlp-sample/api/v1.0/predictions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"data\":{\"ndarray\":[\"this is an example\"]}}"
-
-```
\ No newline at end of file
diff --git a/module-5/k8s/app-triton.yaml b/module-5/k8s/app-triton.yaml
index 40ab962..ba98e42 100644
--- a/module-5/k8s/app-triton.yaml
+++ b/module-5/k8s/app-triton.yaml
@@ -15,7 +15,7 @@ spec:
     spec:
       containers:
         - name: app-triton
-          image: ghcr.io/kyryl-opens-ml/app-triton:latest
+          image: ghcr.io/kyryl-opens-ml/app-pytriton:latest
           env:
           - name: WANDB_API_KEY
             valueFrom:
diff --git a/module-6/.gitignore b/module-6/.gitignore
new file mode 100644
index 0000000..b512c09
--- /dev/null
+++ b/module-6/.gitignore
@@ -0,0 +1 @@
+node_modules
\ No newline at end of file
diff --git a/module-6/README.md b/module-6/README.md
index dcb99a7..04e7a57 100644
--- a/module-6/README.md
+++ b/module-6/README.md
@@ -14,53 +14,81 @@
 
 
 
-# Setup 
+# Setup
 
-Create kind cluster 
+Create kind cluster
 
+```bash
+kind create cluster --name ml-in-production
 ```
-export WANDB_API_KEY="cb86168a2e8db7edb905da69307450f5e7867d66"
-kind create cluster --name ml-in-production-course-week-6
-kubectl create secret generic wandb --from-literal=WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66
+
+Run k9s
+
+```bash
+k9s -A
 ```
 
-Run k9s 
+
+# Setup 
+
 
 ```
-k9s -A
+export WANDB_API_KEY='your key here'
+kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY
 ```
 
 
-# Load test 
+# Benchmarking
+
+NOTE: **Premature optimization is the root of all evil!**
 
-Deploy API 
+Deploy API from module 5
+
+```
+kubectl create -f ./k8s/app-fastapi.yaml
+kubectl create -f ./k8s/app-triton.yaml
+kubectl create -f ./k8s/app-streamlit.yaml
+kubectl create -f ./k8s/kserve-inferenceserver.yaml
+```
 
 ```
-kubectl create -f ./k8s/fastapi-app.yaml
 kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8080:8080
+kubectl port-forward --address 0.0.0.0 svc/app-streamlit 8080:8080
 ```
 
-Run test 
+Run load test via locust
 
 ```
-locust -f load-testing/locustfile.py --host=http://app-fastapi.default.svc.cluster.local:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s
+locust -f load-testing/locustfile.py --host=http://0.0.0.0:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s
 ```
 
-Run on k8s 
+Run load test via k6
+
+```
+K6_WEB_DASHBOARD=true k6 run ./load-testing/load_test.js
+```
 
+Run on k8s 
 
 ```
-kubectl create -f ./k8s/fastapi-locust.yaml
-kubectl port-forward --address 0.0.0.0 pod/load-fastapi-naive 8089:8089
+kubectl create -f ./k8s/vegeta-job.yaml
 ```
 
 - https://github.com/locustio/locust
 - https://github.com/grafana/k6
 - https://github.com/gatling/gatling
+- https://ghz.sh/
+- https://github.com/tsenart/vegeta
+
 
-# HPA
+# Vertical scaling
 
+- https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler
+- https://docs.railway.app/reference/scaling 
 
+# Horizontal scaling
+
+- https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/
 
 Install metric server 
 
@@ -69,6 +97,13 @@ kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/late
 kubectl patch -n kube-system deployment metrics-server --type=json -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]'
 ```
 
+Update deployment 
+
+```
+kubectl apply -f k8s/app-fastapi-resources.yaml
+```
+
+
 Create from cli
 
 ```
@@ -84,65 +119,38 @@ kubectl create -f ./k8s/fastapi-hpa.yaml
 
 - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/
 - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/
+- https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/
 
 
-# Async inferece 
-
-## Install KServe
-
-Install kserve
+KNative autoscaling: https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/
 
 ```
-curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.10/hack/quick_install.sh" | bash
-```
-
-## Test single model 
-
+kubectl create -f ./k8s/kserve-inferenceserver-autoscaling.yaml
 ```
-kubectl create namespace kserve-test
-kubectl create -n kserve-test -f ./k8s/kserve-iris.yaml
-kubectl get inferenceservices sklearn-iris -n kserve-test
-kubectl get svc istio-ingressgateway -n istio-system
-
-kubectl port-forward --address 0.0.0.0 svc/istio-ingressgateway -n istio-system 8080:80
 
-```
 
 ```
-curl -v -H "Host: sklearn-iris.kserve-test.example.com" "http://0.0.0.0:8080/v1/models/sklearn-iris:predict" -d @data/iris-input.json
+seq 1 1000 | xargs -n1 -P10 -I {} curl -v -H "Host: custom-model-autoscaling.default.example.com" \
+-H "Content-Type: application/json" \
+"http://localhost:8080/v1/models/custom-model:predict" \
+-d @data-samples/kserve-input.json
 ```
 
-
-```
-kubectl create -f load-testing/perf.yaml -n kserve-test
-```
-
-
-## Test custom model 
+# Async inferece 
 
 
-Run locally 
+Simple example 
 
 ```
-docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve .
-docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve . && docker push kyrylprojector/kserve-custom:latest
-
-docker run -e PORT=8080 -e WANDB_API_KEY=******* -p 8080:8080 kyrylprojector/kserve-custom:latest 
-
-
-curl localhost:8080/v1/models/kserve-custom:predict -d @data/text-input.json
+modal deploy ./queue/simple_queue.py
+python queue/simple_queue.py
 ```
 
-Run on k8s 
 
-```
-kubectl apply -f k8s/kserve-custom.yaml
+Seldon V2 Examples: https://docs.seldon.io/projects/seldon-core/en/v2/contents/architecture/index.html
+SQS: https://github.com/poundifdef/smoothmq 
 
-kubectl port-forward --namespace istio-system svc/istio-ingressgateway 8080:80
-curl -v -H "Host: custom-model.default.example.com" "http://0.0.0.0:8080/v1/models/kserve-custom:predict" -d @data/text-input.json
-```
 
-- https://kserve.github.io/website/0.10/modelserving/v1beta1/custom/custom_model/#implement-custom-model-using-kserve-api
 
 
 ## Kafka
@@ -152,8 +160,12 @@ Install kafka
 
 ```
 helm repo add bitnami https://charts.bitnami.com/bitnami
-helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true --set persistance.enabled=false --version 11.0.0
-helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local --version 21.0.0
+helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true \
+  --set persistance.enabled=false --version 11.0.0
+helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false \
+  --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local \
+  --version 21.0.0
+
 ```
 
 Install eventing
@@ -224,7 +236,6 @@ mc cp data/text-input.json myminio/input
 - https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation
 - https://github.com/huggingface/distil-whisper/
 
-
 - https://github.com/intel/neural-compressor
 - https://github.com/neuralmagic/sparseml
 
diff --git a/module-6/data/iris-input.json b/module-6/data/iris-input.json
deleted file mode 100644
index 789813f..0000000
--- a/module-6/data/iris-input.json
+++ /dev/null
@@ -1,6 +0,0 @@
-{
-    "instances": [
-      [6.8,  2.8,  4.8,  1.4],
-      [6.0,  3.4,  4.5,  1.6]
-    ]
-  }
\ No newline at end of file
diff --git a/module-6/data/text-input.json b/module-6/data/text-input.json
deleted file mode 100644
index c028af6..0000000
--- a/module-6/data/text-input.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "instances": [
-      "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!",
-      "Despite its high production values, the plot is predictable and lacks originality.",
-      "An epic space opera that pulls you in with its intricate plot and complex characters.",
-      "Too reliant on CGI, and the storyline feels disjointed and hard to follow.",
-      "An extraordinary cinematic experience that beautifully captures the human spirit.",
-      "The pacing is too slow, and it tends to feel more like a documentary than a feature film.",
-      "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.",
-      "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.",
-      "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.",
-      "The script is clichéd, and the chemistry between the lead actors feels forced.",
-      "A thrilling and suspenseful journey that keeps you on the edge of your seat till the end.",
-      "The plot twists feel contrived, and the horror elements seem more comical than scary.",
-      "A poignant exploration of life and love, combined with a mesmerizing soundtrack.",
-      "The narrative is overly sentimental and fails to deliver a strong message.",
-      "An underwater adventure that's both visually stunning and emotionally resonant.",
-      "The visual effects overshadow the story, which is lacking in depth and originality.",
-      "An action-packed thrill ride with memorable characters and an engaging plot.",
-      "The action scenes are overdone and the storyline is paper thin.",
-      "A captivating sci-fi thriller that challenges your perception of reality.",
-      "The plot is confusing and the ending leaves too many questions unanswered."
-    ]
-  }
\ No newline at end of file
diff --git a/module-6/k8s/fastapi-hpa.yaml b/module-6/k8s/app-fastapi-hpa.yaml
similarity index 100%
rename from module-6/k8s/fastapi-hpa.yaml
rename to module-6/k8s/app-fastapi-hpa.yaml
diff --git a/module-6/k8s/fastapi-app.yaml b/module-6/k8s/app-fastapi-resources.yaml
similarity index 58%
rename from module-6/k8s/fastapi-app.yaml
rename to module-6/k8s/app-fastapi-resources.yaml
index a7e67f3..1d9ff20 100644
--- a/module-6/k8s/fastapi-app.yaml
+++ b/module-6/k8s/app-fastapi-resources.yaml
@@ -4,7 +4,7 @@ kind: Deployment
 metadata:
   name: app-fastapi
 spec:
-  replicas: 1
+  replicas: 2
   selector:
     matchLabels:
       app: app-fastapi
@@ -15,11 +15,16 @@ spec:
     spec:
       containers:
         - name: app-fastapi
-          image: kyrylprojector/app-fastapi-week-6:latest
+          image: ghcr.io/kyryl-opens-ml/app-fastapi:latest
           env:
           - name: WANDB_API_KEY
-            value: cb86168a2e8db7edb905da69307450f5e7867d66
+            valueFrom:
+              secretKeyRef:
+                name: wandb
+                key: WANDB_API_KEY
           resources:
+            limits:
+              cpu: 500m
             requests:
               cpu: 200m
 ---
@@ -35,16 +40,3 @@ spec:
     protocol: TCP
   selector:
     app: app-fastapi
----
-apiVersion: autoscaling/v1
-kind: HorizontalPodAutoscaler
-metadata:
-  name: app-fastapi
-spec:
-  scaleTargetRef:
-    apiVersion: apps/v1
-    kind: Deployment
-    name: app-fastapi
-  minReplicas: 1
-  maxReplicas: 10
-  targetCPUUtilizationPercentage: 50
diff --git a/module-6/k8s/fastapi-locust.yaml b/module-6/k8s/fastapi-locust.yaml
deleted file mode 100644
index fca8509..0000000
--- a/module-6/k8s/fastapi-locust.yaml
+++ /dev/null
@@ -1,18 +0,0 @@
----
-apiVersion: v1
-kind: Pod
-metadata:
-  name: load-fastapi-naive
-spec:
-  containers:
-  - command:
-    - /bin/sh
-    - -c
-    - 'locust -f /app/load-testing/locustfile.py --host=http://app-fastapi.default.svc.cluster.local:8080 --users 100 --spawn-rate 20 --autostart --run-time 600s'
-    image: kyrylprojector/app-fastapi-week-6:latest
-    imagePullPolicy: Always
-    name: load
-    ports:
-    - containerPort: 8089
-      protocol: TCP
----
diff --git a/module-6/k8s/kafka-infra.yaml b/module-6/k8s/kafka-infra.yaml
deleted file mode 100644
index c0ae450..0000000
--- a/module-6/k8s/kafka-infra.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-apiVersion: rbac.authorization.k8s.io/v1
-kind: ClusterRole
-metadata:
-  name: inferenceservice-addressable-resolver
-  labels:
-    contrib.eventing.knative.dev/release: devel
-    duck.knative.dev/addressable: "true"
-# Do not use this role directly. These rules will be added to the "addressable-resolver" role.
-rules:
-  - apiGroups:
-      - serving.kserve.io
-    resources:
-      - inferenceservices
-      - inferenceservices/status
-    verbs:
-      - get
-      - list
-      - watch
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  labels:
-    app: minio
-  name: minio
-spec:
-  progressDeadlineSeconds: 600
-  replicas: 1
-  revisionHistoryLimit: 10
-  selector:
-    matchLabels:
-      app: minio
-  strategy:
-    type: Recreate
-  template:
-    metadata:
-      labels:
-        app: minio
-    spec:
-      containers:
-        - args:
-            - server
-            - /data
-          env:
-            - name: MINIO_ACCESS_KEY
-              value: minio
-            - name: MINIO_SECRET_KEY
-              value: minio123
-          image: minio/minio:RELEASE.2020-10-18T21-54-12Z
-          imagePullPolicy: IfNotPresent
-          name: minio
-          ports:
-            - containerPort: 9000
-              protocol: TCP
----
-apiVersion: v1
-kind: Service
-metadata:
-  labels:
-    app: minio
-  name: minio-service
-spec:
-  ports:
-    - port: 9000
-      protocol: TCP
-      targetPort: 9000
-  selector:
-    app: minio
-  type: ClusterIP
----
-apiVersion: v1
-kind: Secret
-metadata:
-  name: mysecret
-  annotations:
-    serving.kserve.io/s3-endpoint: minio-service:9000 # replace with your s3 endpoint
-    serving.kserve.io/s3-usehttps: "0" # by default 1, for testing with minio you need to set to 0
-type: Opaque
-data:
-  AWS_ACCESS_KEY_ID: bWluaW8=
-  AWS_SECRET_ACCESS_KEY: bWluaW8xMjM=
----
-apiVersion: v1
-kind: ServiceAccount
-metadata:
-  name: default
-secrets:
-  - name: mysecret
-
diff --git a/module-6/k8s/kafka-model-new.yaml b/module-6/k8s/kafka-model-new.yaml
deleted file mode 100644
index f0f9d15..0000000
--- a/module-6/k8s/kafka-model-new.yaml
+++ /dev/null
@@ -1,41 +0,0 @@
-apiVersion: serving.kserve.io/v1beta1
-kind: InferenceService
-metadata:
-  name: kserve-custom
-spec:
-  predictor:
-    minReplicas: 1
-    containers:
-      - name: kserve-container
-        image: kyrylprojector/kserve-custom:latest
-
-  transformer:
-    minReplicas: 1
-    containers:
-      - image: kyrylprojector/kserve-custom-transformer:latest
-        name: kserve-container
-        resources:
-          limits:
-            cpu: 100m
-            memory: 1Gi
-          requests:
-            cpu: 100m
-            memory: 1Gi
----
-apiVersion: sources.knative.dev/v1beta1
-kind: KafkaSource
-metadata:
-  name: kafka-source
-spec:
-  consumerGroup: knative-group
-  bootstrapServers:
-    - kafka-headless.default.svc.cluster.local:9092
-  topics:
-    - test
-  sink:
-    ref:
-      apiVersion: serving.kserve.io/v1beta1
-      kind: InferenceService
-      name: kserve-custom
-    uri: /v1/models/kserve-custom:predict
-
diff --git a/module-6/k8s/kafka-ui-values.yml b/module-6/k8s/kafka-ui-values.yml
deleted file mode 100644
index cad1b88..0000000
--- a/module-6/k8s/kafka-ui-values.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-yamlApplicationConfig:
-  kafka:
-    clusters:
-      - name: yaml
-        bootstrapServers: kafka-headless.default.svc.cluster.local:9092
-  auth:
-    type: disabled
-  management:
-    health:
-      ldap:
-        enabled: false
diff --git a/module-6/k8s/kserve-custom.yaml b/module-6/k8s/kserve-custom.yaml
deleted file mode 100644
index 09e68c0..0000000
--- a/module-6/k8s/kserve-custom.yaml
+++ /dev/null
@@ -1,9 +0,0 @@
-apiVersion: serving.kserve.io/v1beta1
-kind: InferenceService
-metadata:
-  name: custom-model
-spec:
-  predictor:
-    containers:
-      - name: kserve-container
-        image: kyrylprojector/kserve-custom:latest
\ No newline at end of file
diff --git a/module-6/k8s/kserve-inferenceserver-autoscaling.yaml b/module-6/k8s/kserve-inferenceserver-autoscaling.yaml
new file mode 100644
index 0000000..aa5fac0
--- /dev/null
+++ b/module-6/k8s/kserve-inferenceserver-autoscaling.yaml
@@ -0,0 +1,18 @@
+apiVersion: serving.kserve.io/v1beta1
+kind: InferenceService
+metadata:
+  name: custom-model-autoscaling
+spec:
+  predictor:
+    scaleTarget: 1
+    scaleMetric: concurrency  
+    containers:
+      - name: kserve-container
+        image: ghcr.io/kyryl-opens-ml/app-kserve:latest
+        imagePullPolicy: Always
+        env:
+        - name: WANDB_API_KEY
+          valueFrom:
+            secretKeyRef:
+              name: wandb
+              key: WANDB_API_KEY
\ No newline at end of file
diff --git a/module-6/k8s/kserve-iris.yaml b/module-6/k8s/kserve-iris.yaml
deleted file mode 100644
index 90bc825..0000000
--- a/module-6/k8s/kserve-iris.yaml
+++ /dev/null
@@ -1,10 +0,0 @@
-apiVersion: "serving.kserve.io/v1beta1"
-kind: "InferenceService"
-metadata:
-  name: "sklearn-iris"
-spec:
-  predictor:
-    model:
-      modelFormat:
-        name: sklearn
-      storageUri: "gs://kfserving-examples/models/sklearn/1.0/model"
diff --git a/module-6/load-testing/load_test.js b/module-6/load-testing/load_test.js
new file mode 100644
index 0000000..9dc5bfa
--- /dev/null
+++ b/module-6/load-testing/load_test.js
@@ -0,0 +1,47 @@
+import http from 'k6/http';
+import { sleep } from 'k6';
+
+const movie_reviews = [
+    "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!",
+    "Despite its high production values, the plot is predictable and lacks originality.",
+    "An epic space opera that pulls you in with its intricate plot and complex characters.",
+    "Too reliant on CGI, and the storyline feels disjointed and hard to follow.",
+    "An extraordinary cinematic experience that beautifully captures the human spirit.",
+    "The pacing is too slow, and it tends to feel more like a documentary than a feature film.",
+    "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.",
+    "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.",
+    "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.",
+    "The script is clichéd, and the chemistry between the lead actors feels forced.",
+    "A thrilling and suspenseful journey that keeps you on the edge of your seat till the end.",
+    "The plot twists feel contrived, and the horror elements seem more comical than scary.",
+    "A poignant exploration of life and love, combined with a mesmerizing soundtrack.",
+    "The narrative is overly sentimental and fails to deliver a strong message.",
+    "An underwater adventure that's both visually stunning and emotionally resonant.",
+    "The visual effects overshadow the story, which is lacking in depth and originality.",
+    "An action-packed thrill ride with memorable characters and an engaging plot.",
+    "The action scenes are overdone and the storyline is paper thin.",
+    "A captivating sci-fi thriller that challenges your perception of reality.",
+    "The plot is confusing and the ending leaves too many questions unanswered.",
+];
+
+export let options = {
+    vus: 10,
+    duration: '10m',
+};
+
+export default function () {
+    sleep(Math.random() * 4 + 1);
+    const num_of_review = Math.floor(Math.random() * 100) + 1;
+    const reviews = [];
+    for (let i = 0; i < num_of_review; i++) {
+        const random_index = Math.floor(Math.random() * movie_reviews.length);
+        reviews.push(movie_reviews[random_index]);
+    }
+    const payload = JSON.stringify({ text: reviews });
+    const params = {
+        headers: {
+            'Content-Type': 'application/json',
+        },
+    };
+    http.post('http://0.0.0.0:8080/predict', payload, params);
+}
\ No newline at end of file
diff --git a/module-6/load-testing/perf.yaml b/module-6/load-testing/perf.yaml
deleted file mode 100644
index f5789b0..0000000
--- a/module-6/load-testing/perf.yaml
+++ /dev/null
@@ -1,48 +0,0 @@
-apiVersion: batch/v1
-kind: Job
-metadata:
-  generateName: load-test
-spec:
-  backoffLimit: 6
-  parallelism: 1
-  template:
-    metadata:
-      annotations:
-        sidecar.istio.io/inject: "false"
-    spec:
-      restartPolicy: OnFailure
-      containers:
-      - args:
-        - vegeta -cpus=5 attack -duration=1m -rate=500/1s -targets=/var/vegeta/cfg
-          | vegeta report -type=text
-        command:
-        - sh 
-        - -c
-        image: peterevans/vegeta:latest
-        imagePullPolicy: Always
-        name: vegeta
-        volumeMounts:
-        - mountPath: /var/vegeta
-          name: vegeta-cfg
-      volumes:
-      - configMap:
-          defaultMode: 420
-          name: vegeta-cfg
-        name: vegeta-cfg
----
-apiVersion: v1
-data:
-  cfg: |
-    POST http://sklearn-iris.kserve-test.svc.cluster.local/v1/models/sklearn-iris:predict
-    @/var/vegeta/payload
-  payload: |
-    {
-      "instances": [
-        [6.8,  2.8,  4.8,  1.4],
-        [6.0,  3.4,  4.5,  1.6]
-      ]
-    }
-kind: ConfigMap
-metadata:
-  annotations:
-  name: vegeta-cfg
diff --git a/module-6/load-testing/vegeta-job.yaml b/module-6/load-testing/vegeta-job.yaml
new file mode 100644
index 0000000..d53f845
--- /dev/null
+++ b/module-6/load-testing/vegeta-job.yaml
@@ -0,0 +1,56 @@
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: vegeta-cfg
+data:
+  cfg: |
+    POST http://app-fastapi.default.svc.cluster.local:8080/predict
+    Content-Type: application/json
+    @/var/vegeta/payload
+  payload: |
+    {
+      "text": [
+        "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!",
+        "Despite its high production values, the plot is predictable and lacks originality.",
+        "An epic space opera that pulls you in with its intricate plot and complex characters.",
+        "Too reliant on CGI, and the storyline feels disjointed and hard to follow.",
+        "An extraordinary cinematic experience that beautifully captures the human spirit.",
+        "The pacing is too slow, and it tends to feel more like a documentary than a feature film.",
+        "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.",
+        "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.",
+        "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.",
+        "The script is clichéd, and the chemistry between the lead actors feels forced."
+      ]
+    }
+---    
+apiVersion: batch/v1
+kind: Job
+metadata:
+  generateName: load-test-
+spec:
+  backoffLimit: 6
+  parallelism: 1
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/inject: "false"
+    spec:
+      restartPolicy: OnFailure
+      containers:
+      - name: vegeta
+        image: peterevans/vegeta:latest
+        imagePullPolicy: Always
+        command:
+        - sh
+        - -c
+        args:
+        - vegeta -cpus=2 attack -duration=1m -rate=100/1s -targets=/var/vegeta/cfg | vegeta report -type=text
+        volumeMounts:
+        - name: vegeta-cfg
+          mountPath: /var/vegeta
+      volumes:
+      - name: vegeta-cfg
+        configMap:
+          name: vegeta-cfg
+          defaultMode: 420    
\ No newline at end of file
diff --git a/module-6/queue/simple_queue.py b/module-6/queue/simple_queue.py
new file mode 100644
index 0000000..90f3e7a
--- /dev/null
+++ b/module-6/queue/simple_queue.py
@@ -0,0 +1,45 @@
+from fastapi import FastAPI
+from pydantic import BaseModel
+import modal
+
+app = modal.App("simple-queue")
+
+@app.function()
+def process_job(data):
+    import time
+    time.sleep(60)
+    return {"result": data}
+
+web_app = FastAPI()
+
+def submit_job(data):
+    process_job = modal.Function.lookup("simple-queue", "process_job")
+    call = process_job.spawn(data)
+    return call.object_id
+
+def get_job_result(call_id):
+    function_call = modal.functions.FunctionCall.from_id(call_id)
+    try:
+        result = function_call.get(timeout=0)
+    except modal.exception.OutputExpiredError:
+        result = {"result": "expired"}
+    except TimeoutError:
+        result = {"result": "pending"}
+    return result
+
+class SubmitJobRequest(BaseModel):
+    data: str
+
+@web_app.post("/submit_job")
+async def submit_job_endpoint(request: SubmitJobRequest):
+    call_id = submit_job(request.data)
+    return {"call_id": call_id}
+
+@web_app.get("/get_job_result")
+async def get_job_result_endpoint(call_id: str):
+    result = get_job_result(call_id)
+    return result
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(web_app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/module-6/serving/fast_api.py b/module-6/serving/fast_api.py
deleted file mode 100644
index fd97582..0000000
--- a/module-6/serving/fast_api.py
+++ /dev/null
@@ -1,29 +0,0 @@
-from typing import List
-
-from fastapi import FastAPI
-from pydantic import BaseModel
-
-from serving.predictor import Predictor
-
-
-class Payload(BaseModel):
-    text: List[str]
-
-
-class Prediction(BaseModel):
-    probs: List[List[float]]
-
-
-app = FastAPI()
-predictor = Predictor.default_from_model_registry()
-
-
-@app.get("/health_check")
-def health_check() -> str:
-    return "ok"
-
-
-@app.post("/predict", response_model=Prediction)
-def predict(payload: Payload) -> Prediction:
-    prediction = predictor.predict(text=payload.text)
-    return Prediction(probs=prediction.tolist())
diff --git a/module-6/serving/kserve_api.py b/module-6/serving/kserve_api.py
deleted file mode 100644
index ca388bf..0000000
--- a/module-6/serving/kserve_api.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import argparse
-import logging
-from typing import Dict
-
-import kserve
-
-from serving.predictor import Predictor
-
-logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL)
-
-
-class CustomModel(kserve.Model):
-    def __init__(self, name: str):
-        super().__init__(name)
-        self.name = name
-        self.predictor = None
-        self.load()
-
-    def load(self):
-        self.predictor = Predictor.default_from_model_registry()
-
-    def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict:
-        logging.info(f"Received inputs {payload}")
-        text = payload["instances"]
-        logging.info(f"Received text {text}, {type(text)}")
-        result = self.predictor.predict(text=text)
-        logging.info(f"Resutls {result}, {type(result)}")
-        return {"predictions": result.tolist()}
-
-
-DEFAULT_MODEL_NAME = "kserve-custom"
-parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
-parser.add_argument("--model_name", default=DEFAULT_MODEL_NAME, help="The name that the model is served under.")
-
-args, _ = parser.parse_known_args()
-
-if __name__ == "__main__":
-    custom_model = CustomModel(args.model_name)
-    server = kserve.ModelServer()
-    server.start(models=[custom_model])
diff --git a/module-6/serving/kserve_transformer.py b/module-6/serving/kserve_transformer.py
deleted file mode 100644
index b54d8a8..0000000
--- a/module-6/serving/kserve_transformer.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import argparse
-import datetime
-import json
-import logging
-import time
-from typing import Dict, Union
-
-import boto3
-import kserve
-from cloudevents.http import CloudEvent
-from kserve import InferRequest, InferResponse
-from kserve.protocol.grpc.grpc_predict_v2_pb2 import ModelInferResponse
-
-logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL)
-
-session = boto3.Session()
-client = session.client("s3", endpoint_url="http://minio-service:9000", aws_access_key_id="minio", aws_secret_access_key="minio123")
-digits_bucket = "output"
-
-
-class ImageTransformer(kserve.Model):
-    def __init__(self, name: str, predictor_host: str):
-        super().__init__(name)
-        self.predictor_host = predictor_host
-        self._key = None
-
-    def preprocess(self, inputs: Union[Dict, CloudEvent, InferRequest], headers: Dict[str, str] = None) -> Union[Dict, InferRequest]:
-
-        logging.info("Received inputs %s", inputs)
-        data = json.loads(inputs.get_data().decode("utf-8"))
-        inputs = data
-        if inputs["EventName"] == "s3:ObjectCreated:Put":
-            bucket = inputs["Records"][0]["s3"]["bucket"]["name"]
-            key = inputs["Records"][0]["s3"]["object"]["key"]
-            self._key = key
-            client.download_file(bucket, key, "/tmp/" + key)
-
-            with open("/tmp/" + key, "r") as f:
-                instances = json.load(f)["instances"]
-                logging.info(f"instances {instances}")
-
-            return {"instances": instances}
-        raise Exception("unknown event")
-
-    def postprocess(self, response: Union[Dict, InferResponse, ModelInferResponse], headers: Dict[str, str] = None) -> Union[Dict, ModelInferResponse]:
-        logging.info(
-            f"response: {response}",
-        )
-        predictions = response["predictions"]
-        logging.info(f"predictions: {predictions}")
-
-        upload_path = f"predictions_{time.time()}-{self._key}"
-        with open(upload_path, "w") as f:
-            json.dump(predictions, f)
-
-        client.upload_file(upload_path, digits_bucket, upload_path)
-        logging.info(f"Image {self._key} successfully uploaded to {upload_path}")
-        return response
-
-
-DEFAULT_MODEL_NAME = "custom-model"
-
-parser = argparse.ArgumentParser(parents=[kserve.model_server.parser])
-parser.add_argument("--model_name", default=DEFAULT_MODEL_NAME, help="The name that the model is served under.")
-parser.add_argument("--predictor_host", help="The URL for the model predict function", required=True)
-
-args, _ = parser.parse_known_args()
-
-if __name__ == "__main__":
-    transformer = ImageTransformer(args.model_name, predictor_host=args.predictor_host)
-    server = kserve.ModelServer()
-    server.start(models=[transformer])
diff --git a/module-6/serving/predictor.py b/module-6/serving/predictor.py
deleted file mode 100644
index 2c5184d..0000000
--- a/module-6/serving/predictor.py
+++ /dev/null
@@ -1,55 +0,0 @@
-import logging
-from pathlib import Path
-from typing import List
-
-import pandas as pd
-import torch
-import wandb
-from filelock import FileLock
-from torch.nn.functional import softmax
-from tqdm import tqdm
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-logger = logging.getLogger()
-
-MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest"
-MODEL_PATH = "/tmp/model"
-MODEL_LOCK = ".lock-file"
-WANDB_KEY = "cb86168a2e8db7edb905da69307450f5e7867d66"
-
-def load_from_registry(model_name: str, model_path: Path):
-    wandb.login(key=WANDB_KEY)
-    with wandb.init() as run:
-        artifact = run.use_artifact(model_name, type="model")
-        artifact_dir = artifact.download(root=model_path)
-        print(f"{artifact_dir}")
-
-
-class Predictor:
-    def __init__(self, model_load_path: str):
-        self.tokenizer = AutoTokenizer.from_pretrained(model_load_path)
-        self.model = AutoModelForSequenceClassification.from_pretrained(model_load_path)
-        self.model.eval()
-
-    @torch.no_grad()
-    def predict(self, text: List[str]):
-        text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True)
-        bert_outputs = self.model(**text_encoded).logits
-        return softmax(bert_outputs).numpy()
-
-    @classmethod
-    def default_from_model_registry(cls) -> "Predictor":
-        with FileLock(MODEL_LOCK):
-            if not (Path(MODEL_PATH) / "model.safetensors").exists():
-                load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH)
-
-        return cls(model_load_path=MODEL_PATH)
-
-    def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
-        correct_sentence_conf = []
-        for idx in tqdm(range(len(df))):
-            sentence = df.iloc[idx]["sentence"]
-            conf = self.predict([sentence]).flatten()[1]
-            correct_sentence_conf.append(conf)
-        df["correct_sentence_conf"] = correct_sentence_conf
-        return df