From 757bb6815cabfb917e2a414824899c2741d4f838 Mon Sep 17 00:00:00 2001 From: truskovskiyk Date: Sat, 14 Sep 2024 00:36:35 -0400 Subject: [PATCH] ci --- .github/workflows/module-5.yaml | 2 +- .github/workflows/module-6.yaml | 55 +++----- module-5/README.md | 46 ------- module-5/k8s/app-triton.yaml | 2 +- module-6/.gitignore | 1 + module-6/README.md | 129 ++++++++++-------- module-6/data/iris-input.json | 6 - module-6/data/text-input.json | 24 ---- ...{fastapi-hpa.yaml => app-fastapi-hpa.yaml} | 0 ...pi-app.yaml => app-fastapi-resources.yaml} | 24 ++-- module-6/k8s/fastapi-locust.yaml | 18 --- module-6/k8s/kafka-infra.yaml | 89 ------------ module-6/k8s/kafka-model-new.yaml | 41 ------ module-6/k8s/kafka-ui-values.yml | 11 -- module-6/k8s/kserve-custom.yaml | 9 -- .../kserve-inferenceserver-autoscaling.yaml | 18 +++ module-6/k8s/kserve-iris.yaml | 10 -- module-6/load-testing/load_test.js | 47 +++++++ module-6/load-testing/perf.yaml | 48 ------- module-6/load-testing/vegeta-job.yaml | 56 ++++++++ module-6/queue/simple_queue.py | 45 ++++++ module-6/serving/fast_api.py | 29 ---- module-6/serving/kserve_api.py | 40 ------ module-6/serving/kserve_transformer.py | 72 ---------- module-6/serving/predictor.py | 55 -------- 25 files changed, 268 insertions(+), 609 deletions(-) create mode 100644 module-6/.gitignore delete mode 100644 module-6/data/iris-input.json delete mode 100644 module-6/data/text-input.json rename module-6/k8s/{fastapi-hpa.yaml => app-fastapi-hpa.yaml} (100%) rename module-6/k8s/{fastapi-app.yaml => app-fastapi-resources.yaml} (58%) delete mode 100644 module-6/k8s/fastapi-locust.yaml delete mode 100644 module-6/k8s/kafka-infra.yaml delete mode 100644 module-6/k8s/kafka-model-new.yaml delete mode 100644 module-6/k8s/kafka-ui-values.yml delete mode 100644 module-6/k8s/kserve-custom.yaml create mode 100644 module-6/k8s/kserve-inferenceserver-autoscaling.yaml delete mode 100644 module-6/k8s/kserve-iris.yaml create mode 100644 module-6/load-testing/load_test.js delete mode 100644 module-6/load-testing/perf.yaml create mode 100644 module-6/load-testing/vegeta-job.yaml create mode 100644 module-6/queue/simple_queue.py delete mode 100644 module-6/serving/fast_api.py delete mode 100644 module-6/serving/kserve_api.py delete mode 100644 module-6/serving/kserve_transformer.py delete mode 100644 module-6/serving/predictor.py diff --git a/.github/workflows/module-5.yaml b/.github/workflows/module-5.yaml index 9ba2228..767207f 100644 --- a/.github/workflows/module-5.yaml +++ b/.github/workflows/module-5.yaml @@ -59,4 +59,4 @@ jobs: context: module-5/ push: true target: app-kserve - tags: ghcr.io/kyryl-opens-ml/app-kserve:latest \ No newline at end of file + tags: ghcr.io/kyryl-opens-ml/app-kserve:latest \ No newline at end of file diff --git a/.github/workflows/module-6.yaml b/.github/workflows/module-6.yaml index f16c58a..ef81a2b 100644 --- a/.github/workflows/module-6.yaml +++ b/.github/workflows/module-6.yaml @@ -1,42 +1,29 @@ name: Module 6 on: - workflow_dispatch: + push: + branches: + - main + + pull_request: + branches: + - main + # paths: + # - 'module-6/**' jobs: - build: + docker-builds: runs-on: ubuntu-latest + permissions: + contents: read + packages: write steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Login to Docker Hub - uses: docker/login-action@v1 - with: - username: ${{ secrets.DOCKER_HUB_USERNAME }} - password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }} - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v1 - - - name: Build app fastapi - uses: docker/build-push-action@v2 + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Log in to the Container registry + uses: docker/login-action@v3 with: - context: week-6/ - file: week-6/Dockerfile - push: true - target: app-fastapi - tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:latest - cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache - cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-fastapi-week-6:buildcache,mode=max - - # - name: Build app seldon - # uses: docker/build-push-action@v2 - # with: - # context: week-5/ - # file: week-5/Dockerfile - # push: true - # target: app-seldon - # tags: ${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:latest - # cache-from: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache - # cache-to: type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/app-seldon:buildcache,mode=max + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} \ No newline at end of file diff --git a/module-5/README.md b/module-5/README.md index 052818e..8a3a5ab 100644 --- a/module-5/README.md +++ b/module-5/README.md @@ -34,10 +34,6 @@ k9s -A ``` export WANDB_API_KEY='your key here' -``` - - -``` kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY ``` @@ -119,45 +115,3 @@ Call API ``` curl -v -H "Host: custom-model.default.example.com" -H "Content-Type: application/json" "http://localhost:8080/v1/models/custom-model:predict" -d @data-samples/kserve-input.json ``` - - -# Seldon V2 - -``` -git clone https://github.com/SeldonIO/seldon-core --branch=v2 -``` - - -# Seldon V1 - - -## Install with helm - -``` -kubectl apply -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-crds.yaml -kubectl apply -n ambassador -f https://github.com/datawire/ambassador-operator/releases/latest/download/ambassador-operator-kind.yaml -kubectl wait --timeout=180s -n ambassador --for=condition=deployed ambassadorinstallations/ambassador - -kubectl create namespace seldon-system - -helm install seldon-core seldon-core-operator --version 1.15.1 --repo https://storage.googleapis.com/seldon-charts --set usageMetrics.enabled=true --set ambassador.enabled=true --namespace seldon-system -``` - -## Port forward - -``` -kubectl port-forward --address 0.0.0.0 -n ambassador svc/ambassador 7777:80 -``` - - -## Custom example -``` -kubectl create -f k8s/seldon-custom.yaml - -open http://IP:7777/seldon/default/nlp-sample/api/v1.0/doc/#/ -{ "data": { "ndarray": ["this is an example"] } } - - -curl -X POST "http://IP:7777/seldon/default/nlp-sample/api/v1.0/predictions" -H "accept: application/json" -H "Content-Type: application/json" -d "{\"data\":{\"ndarray\":[\"this is an example\"]}}" - -``` \ No newline at end of file diff --git a/module-5/k8s/app-triton.yaml b/module-5/k8s/app-triton.yaml index 40ab962..ba98e42 100644 --- a/module-5/k8s/app-triton.yaml +++ b/module-5/k8s/app-triton.yaml @@ -15,7 +15,7 @@ spec: spec: containers: - name: app-triton - image: ghcr.io/kyryl-opens-ml/app-triton:latest + image: ghcr.io/kyryl-opens-ml/app-pytriton:latest env: - name: WANDB_API_KEY valueFrom: diff --git a/module-6/.gitignore b/module-6/.gitignore new file mode 100644 index 0000000..b512c09 --- /dev/null +++ b/module-6/.gitignore @@ -0,0 +1 @@ +node_modules \ No newline at end of file diff --git a/module-6/README.md b/module-6/README.md index dcb99a7..04e7a57 100644 --- a/module-6/README.md +++ b/module-6/README.md @@ -14,53 +14,81 @@ -# Setup +# Setup -Create kind cluster +Create kind cluster +```bash +kind create cluster --name ml-in-production ``` -export WANDB_API_KEY="cb86168a2e8db7edb905da69307450f5e7867d66" -kind create cluster --name ml-in-production-course-week-6 -kubectl create secret generic wandb --from-literal=WANDB_API_KEY=cb86168a2e8db7edb905da69307450f5e7867d66 + +Run k9s + +```bash +k9s -A ``` -Run k9s + +# Setup + ``` -k9s -A +export WANDB_API_KEY='your key here' +kubectl create secret generic wandb --from-literal=WANDB_API_KEY=$WANDB_API_KEY ``` -# Load test +# Benchmarking + +NOTE: **Premature optimization is the root of all evil!** -Deploy API +Deploy API from module 5 + +``` +kubectl create -f ./k8s/app-fastapi.yaml +kubectl create -f ./k8s/app-triton.yaml +kubectl create -f ./k8s/app-streamlit.yaml +kubectl create -f ./k8s/kserve-inferenceserver.yaml +``` ``` -kubectl create -f ./k8s/fastapi-app.yaml kubectl port-forward --address 0.0.0.0 svc/app-fastapi 8080:8080 +kubectl port-forward --address 0.0.0.0 svc/app-streamlit 8080:8080 ``` -Run test +Run load test via locust ``` -locust -f load-testing/locustfile.py --host=http://app-fastapi.default.svc.cluster.local:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s +locust -f load-testing/locustfile.py --host=http://0.0.0.0:8080 --users 50 --spawn-rate 10 --autostart --run-time 600s ``` -Run on k8s +Run load test via k6 + +``` +K6_WEB_DASHBOARD=true k6 run ./load-testing/load_test.js +``` +Run on k8s ``` -kubectl create -f ./k8s/fastapi-locust.yaml -kubectl port-forward --address 0.0.0.0 pod/load-fastapi-naive 8089:8089 +kubectl create -f ./k8s/vegeta-job.yaml ``` - https://github.com/locustio/locust - https://github.com/grafana/k6 - https://github.com/gatling/gatling +- https://ghz.sh/ +- https://github.com/tsenart/vegeta + -# HPA +# Vertical scaling +- https://github.com/kubernetes/autoscaler/tree/master/vertical-pod-autoscaler +- https://docs.railway.app/reference/scaling +# Horizontal scaling + +- https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ Install metric server @@ -69,6 +97,13 @@ kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/late kubectl patch -n kube-system deployment metrics-server --type=json -p '[{"op":"add","path":"/spec/template/spec/containers/0/args/-","value":"--kubelet-insecure-tls"}]' ``` +Update deployment + +``` +kubectl apply -f k8s/app-fastapi-resources.yaml +``` + + Create from cli ``` @@ -84,65 +119,38 @@ kubectl create -f ./k8s/fastapi-hpa.yaml - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/ - https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale-walkthrough/ +- https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/ -# Async inferece - -## Install KServe - -Install kserve +KNative autoscaling: https://kserve.github.io/website/master/modelserving/autoscaling/autoscaling/ ``` -curl -s "https://raw.githubusercontent.com/kserve/kserve/release-0.10/hack/quick_install.sh" | bash -``` - -## Test single model - +kubectl create -f ./k8s/kserve-inferenceserver-autoscaling.yaml ``` -kubectl create namespace kserve-test -kubectl create -n kserve-test -f ./k8s/kserve-iris.yaml -kubectl get inferenceservices sklearn-iris -n kserve-test -kubectl get svc istio-ingressgateway -n istio-system - -kubectl port-forward --address 0.0.0.0 svc/istio-ingressgateway -n istio-system 8080:80 -``` ``` -curl -v -H "Host: sklearn-iris.kserve-test.example.com" "http://0.0.0.0:8080/v1/models/sklearn-iris:predict" -d @data/iris-input.json +seq 1 1000 | xargs -n1 -P10 -I {} curl -v -H "Host: custom-model-autoscaling.default.example.com" \ +-H "Content-Type: application/json" \ +"http://localhost:8080/v1/models/custom-model:predict" \ +-d @data-samples/kserve-input.json ``` - -``` -kubectl create -f load-testing/perf.yaml -n kserve-test -``` - - -## Test custom model +# Async inferece -Run locally +Simple example ``` -docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve . -docker build -t kyrylprojector/kserve-custom:latest -f Dockerfile --target app-kserve . && docker push kyrylprojector/kserve-custom:latest - -docker run -e PORT=8080 -e WANDB_API_KEY=******* -p 8080:8080 kyrylprojector/kserve-custom:latest - - -curl localhost:8080/v1/models/kserve-custom:predict -d @data/text-input.json +modal deploy ./queue/simple_queue.py +python queue/simple_queue.py ``` -Run on k8s -``` -kubectl apply -f k8s/kserve-custom.yaml +Seldon V2 Examples: https://docs.seldon.io/projects/seldon-core/en/v2/contents/architecture/index.html +SQS: https://github.com/poundifdef/smoothmq -kubectl port-forward --namespace istio-system svc/istio-ingressgateway 8080:80 -curl -v -H "Host: custom-model.default.example.com" "http://0.0.0.0:8080/v1/models/kserve-custom:predict" -d @data/text-input.json -``` -- https://kserve.github.io/website/0.10/modelserving/v1beta1/custom/custom_model/#implement-custom-model-using-kserve-api ## Kafka @@ -152,8 +160,12 @@ Install kafka ``` helm repo add bitnami https://charts.bitnami.com/bitnami -helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true --set persistance.enabled=false --version 11.0.0 -helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local --version 21.0.0 +helm install zookeeper bitnami/zookeeper --set replicaCount=1 --set auth.enabled=false --set allowAnonymousLogin=true \ + --set persistance.enabled=false --version 11.0.0 +helm install kafka bitnami/kafka --set zookeeper.enabled=false --set replicaCount=1 --set persistance.enabled=false \ + --set logPersistance.enabled=false --set externalZookeeper.servers=zookeeper-headless.default.svc.cluster.local \ + --version 21.0.0 + ``` Install eventing @@ -224,7 +236,6 @@ mc cp data/text-input.json myminio/input - https://github.com/huggingface/transformers/tree/main/examples/research_projects/distillation - https://github.com/huggingface/distil-whisper/ - - https://github.com/intel/neural-compressor - https://github.com/neuralmagic/sparseml diff --git a/module-6/data/iris-input.json b/module-6/data/iris-input.json deleted file mode 100644 index 789813f..0000000 --- a/module-6/data/iris-input.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "instances": [ - [6.8, 2.8, 4.8, 1.4], - [6.0, 3.4, 4.5, 1.6] - ] - } \ No newline at end of file diff --git a/module-6/data/text-input.json b/module-6/data/text-input.json deleted file mode 100644 index c028af6..0000000 --- a/module-6/data/text-input.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "instances": [ - "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!", - "Despite its high production values, the plot is predictable and lacks originality.", - "An epic space opera that pulls you in with its intricate plot and complex characters.", - "Too reliant on CGI, and the storyline feels disjointed and hard to follow.", - "An extraordinary cinematic experience that beautifully captures the human spirit.", - "The pacing is too slow, and it tends to feel more like a documentary than a feature film.", - "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.", - "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.", - "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.", - "The script is clichéd, and the chemistry between the lead actors feels forced.", - "A thrilling and suspenseful journey that keeps you on the edge of your seat till the end.", - "The plot twists feel contrived, and the horror elements seem more comical than scary.", - "A poignant exploration of life and love, combined with a mesmerizing soundtrack.", - "The narrative is overly sentimental and fails to deliver a strong message.", - "An underwater adventure that's both visually stunning and emotionally resonant.", - "The visual effects overshadow the story, which is lacking in depth and originality.", - "An action-packed thrill ride with memorable characters and an engaging plot.", - "The action scenes are overdone and the storyline is paper thin.", - "A captivating sci-fi thriller that challenges your perception of reality.", - "The plot is confusing and the ending leaves too many questions unanswered." - ] - } \ No newline at end of file diff --git a/module-6/k8s/fastapi-hpa.yaml b/module-6/k8s/app-fastapi-hpa.yaml similarity index 100% rename from module-6/k8s/fastapi-hpa.yaml rename to module-6/k8s/app-fastapi-hpa.yaml diff --git a/module-6/k8s/fastapi-app.yaml b/module-6/k8s/app-fastapi-resources.yaml similarity index 58% rename from module-6/k8s/fastapi-app.yaml rename to module-6/k8s/app-fastapi-resources.yaml index a7e67f3..1d9ff20 100644 --- a/module-6/k8s/fastapi-app.yaml +++ b/module-6/k8s/app-fastapi-resources.yaml @@ -4,7 +4,7 @@ kind: Deployment metadata: name: app-fastapi spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: app-fastapi @@ -15,11 +15,16 @@ spec: spec: containers: - name: app-fastapi - image: kyrylprojector/app-fastapi-week-6:latest + image: ghcr.io/kyryl-opens-ml/app-fastapi:latest env: - name: WANDB_API_KEY - value: cb86168a2e8db7edb905da69307450f5e7867d66 + valueFrom: + secretKeyRef: + name: wandb + key: WANDB_API_KEY resources: + limits: + cpu: 500m requests: cpu: 200m --- @@ -35,16 +40,3 @@ spec: protocol: TCP selector: app: app-fastapi ---- -apiVersion: autoscaling/v1 -kind: HorizontalPodAutoscaler -metadata: - name: app-fastapi -spec: - scaleTargetRef: - apiVersion: apps/v1 - kind: Deployment - name: app-fastapi - minReplicas: 1 - maxReplicas: 10 - targetCPUUtilizationPercentage: 50 diff --git a/module-6/k8s/fastapi-locust.yaml b/module-6/k8s/fastapi-locust.yaml deleted file mode 100644 index fca8509..0000000 --- a/module-6/k8s/fastapi-locust.yaml +++ /dev/null @@ -1,18 +0,0 @@ ---- -apiVersion: v1 -kind: Pod -metadata: - name: load-fastapi-naive -spec: - containers: - - command: - - /bin/sh - - -c - - 'locust -f /app/load-testing/locustfile.py --host=http://app-fastapi.default.svc.cluster.local:8080 --users 100 --spawn-rate 20 --autostart --run-time 600s' - image: kyrylprojector/app-fastapi-week-6:latest - imagePullPolicy: Always - name: load - ports: - - containerPort: 8089 - protocol: TCP ---- diff --git a/module-6/k8s/kafka-infra.yaml b/module-6/k8s/kafka-infra.yaml deleted file mode 100644 index c0ae450..0000000 --- a/module-6/k8s/kafka-infra.yaml +++ /dev/null @@ -1,89 +0,0 @@ -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: inferenceservice-addressable-resolver - labels: - contrib.eventing.knative.dev/release: devel - duck.knative.dev/addressable: "true" -# Do not use this role directly. These rules will be added to the "addressable-resolver" role. -rules: - - apiGroups: - - serving.kserve.io - resources: - - inferenceservices - - inferenceservices/status - verbs: - - get - - list - - watch ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: minio - name: minio -spec: - progressDeadlineSeconds: 600 - replicas: 1 - revisionHistoryLimit: 10 - selector: - matchLabels: - app: minio - strategy: - type: Recreate - template: - metadata: - labels: - app: minio - spec: - containers: - - args: - - server - - /data - env: - - name: MINIO_ACCESS_KEY - value: minio - - name: MINIO_SECRET_KEY - value: minio123 - image: minio/minio:RELEASE.2020-10-18T21-54-12Z - imagePullPolicy: IfNotPresent - name: minio - ports: - - containerPort: 9000 - protocol: TCP ---- -apiVersion: v1 -kind: Service -metadata: - labels: - app: minio - name: minio-service -spec: - ports: - - port: 9000 - protocol: TCP - targetPort: 9000 - selector: - app: minio - type: ClusterIP ---- -apiVersion: v1 -kind: Secret -metadata: - name: mysecret - annotations: - serving.kserve.io/s3-endpoint: minio-service:9000 # replace with your s3 endpoint - serving.kserve.io/s3-usehttps: "0" # by default 1, for testing with minio you need to set to 0 -type: Opaque -data: - AWS_ACCESS_KEY_ID: bWluaW8= - AWS_SECRET_ACCESS_KEY: bWluaW8xMjM= ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: default -secrets: - - name: mysecret - diff --git a/module-6/k8s/kafka-model-new.yaml b/module-6/k8s/kafka-model-new.yaml deleted file mode 100644 index f0f9d15..0000000 --- a/module-6/k8s/kafka-model-new.yaml +++ /dev/null @@ -1,41 +0,0 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - name: kserve-custom -spec: - predictor: - minReplicas: 1 - containers: - - name: kserve-container - image: kyrylprojector/kserve-custom:latest - - transformer: - minReplicas: 1 - containers: - - image: kyrylprojector/kserve-custom-transformer:latest - name: kserve-container - resources: - limits: - cpu: 100m - memory: 1Gi - requests: - cpu: 100m - memory: 1Gi ---- -apiVersion: sources.knative.dev/v1beta1 -kind: KafkaSource -metadata: - name: kafka-source -spec: - consumerGroup: knative-group - bootstrapServers: - - kafka-headless.default.svc.cluster.local:9092 - topics: - - test - sink: - ref: - apiVersion: serving.kserve.io/v1beta1 - kind: InferenceService - name: kserve-custom - uri: /v1/models/kserve-custom:predict - diff --git a/module-6/k8s/kafka-ui-values.yml b/module-6/k8s/kafka-ui-values.yml deleted file mode 100644 index cad1b88..0000000 --- a/module-6/k8s/kafka-ui-values.yml +++ /dev/null @@ -1,11 +0,0 @@ -yamlApplicationConfig: - kafka: - clusters: - - name: yaml - bootstrapServers: kafka-headless.default.svc.cluster.local:9092 - auth: - type: disabled - management: - health: - ldap: - enabled: false diff --git a/module-6/k8s/kserve-custom.yaml b/module-6/k8s/kserve-custom.yaml deleted file mode 100644 index 09e68c0..0000000 --- a/module-6/k8s/kserve-custom.yaml +++ /dev/null @@ -1,9 +0,0 @@ -apiVersion: serving.kserve.io/v1beta1 -kind: InferenceService -metadata: - name: custom-model -spec: - predictor: - containers: - - name: kserve-container - image: kyrylprojector/kserve-custom:latest \ No newline at end of file diff --git a/module-6/k8s/kserve-inferenceserver-autoscaling.yaml b/module-6/k8s/kserve-inferenceserver-autoscaling.yaml new file mode 100644 index 0000000..aa5fac0 --- /dev/null +++ b/module-6/k8s/kserve-inferenceserver-autoscaling.yaml @@ -0,0 +1,18 @@ +apiVersion: serving.kserve.io/v1beta1 +kind: InferenceService +metadata: + name: custom-model-autoscaling +spec: + predictor: + scaleTarget: 1 + scaleMetric: concurrency + containers: + - name: kserve-container + image: ghcr.io/kyryl-opens-ml/app-kserve:latest + imagePullPolicy: Always + env: + - name: WANDB_API_KEY + valueFrom: + secretKeyRef: + name: wandb + key: WANDB_API_KEY \ No newline at end of file diff --git a/module-6/k8s/kserve-iris.yaml b/module-6/k8s/kserve-iris.yaml deleted file mode 100644 index 90bc825..0000000 --- a/module-6/k8s/kserve-iris.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: "serving.kserve.io/v1beta1" -kind: "InferenceService" -metadata: - name: "sklearn-iris" -spec: - predictor: - model: - modelFormat: - name: sklearn - storageUri: "gs://kfserving-examples/models/sklearn/1.0/model" diff --git a/module-6/load-testing/load_test.js b/module-6/load-testing/load_test.js new file mode 100644 index 0000000..9dc5bfa --- /dev/null +++ b/module-6/load-testing/load_test.js @@ -0,0 +1,47 @@ +import http from 'k6/http'; +import { sleep } from 'k6'; + +const movie_reviews = [ + "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!", + "Despite its high production values, the plot is predictable and lacks originality.", + "An epic space opera that pulls you in with its intricate plot and complex characters.", + "Too reliant on CGI, and the storyline feels disjointed and hard to follow.", + "An extraordinary cinematic experience that beautifully captures the human spirit.", + "The pacing is too slow, and it tends to feel more like a documentary than a feature film.", + "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.", + "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.", + "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.", + "The script is clichéd, and the chemistry between the lead actors feels forced.", + "A thrilling and suspenseful journey that keeps you on the edge of your seat till the end.", + "The plot twists feel contrived, and the horror elements seem more comical than scary.", + "A poignant exploration of life and love, combined with a mesmerizing soundtrack.", + "The narrative is overly sentimental and fails to deliver a strong message.", + "An underwater adventure that's both visually stunning and emotionally resonant.", + "The visual effects overshadow the story, which is lacking in depth and originality.", + "An action-packed thrill ride with memorable characters and an engaging plot.", + "The action scenes are overdone and the storyline is paper thin.", + "A captivating sci-fi thriller that challenges your perception of reality.", + "The plot is confusing and the ending leaves too many questions unanswered.", +]; + +export let options = { + vus: 10, + duration: '10m', +}; + +export default function () { + sleep(Math.random() * 4 + 1); + const num_of_review = Math.floor(Math.random() * 100) + 1; + const reviews = []; + for (let i = 0; i < num_of_review; i++) { + const random_index = Math.floor(Math.random() * movie_reviews.length); + reviews.push(movie_reviews[random_index]); + } + const payload = JSON.stringify({ text: reviews }); + const params = { + headers: { + 'Content-Type': 'application/json', + }, + }; + http.post('http://0.0.0.0:8080/predict', payload, params); +} \ No newline at end of file diff --git a/module-6/load-testing/perf.yaml b/module-6/load-testing/perf.yaml deleted file mode 100644 index f5789b0..0000000 --- a/module-6/load-testing/perf.yaml +++ /dev/null @@ -1,48 +0,0 @@ -apiVersion: batch/v1 -kind: Job -metadata: - generateName: load-test -spec: - backoffLimit: 6 - parallelism: 1 - template: - metadata: - annotations: - sidecar.istio.io/inject: "false" - spec: - restartPolicy: OnFailure - containers: - - args: - - vegeta -cpus=5 attack -duration=1m -rate=500/1s -targets=/var/vegeta/cfg - | vegeta report -type=text - command: - - sh - - -c - image: peterevans/vegeta:latest - imagePullPolicy: Always - name: vegeta - volumeMounts: - - mountPath: /var/vegeta - name: vegeta-cfg - volumes: - - configMap: - defaultMode: 420 - name: vegeta-cfg - name: vegeta-cfg ---- -apiVersion: v1 -data: - cfg: | - POST http://sklearn-iris.kserve-test.svc.cluster.local/v1/models/sklearn-iris:predict - @/var/vegeta/payload - payload: | - { - "instances": [ - [6.8, 2.8, 4.8, 1.4], - [6.0, 3.4, 4.5, 1.6] - ] - } -kind: ConfigMap -metadata: - annotations: - name: vegeta-cfg diff --git a/module-6/load-testing/vegeta-job.yaml b/module-6/load-testing/vegeta-job.yaml new file mode 100644 index 0000000..d53f845 --- /dev/null +++ b/module-6/load-testing/vegeta-job.yaml @@ -0,0 +1,56 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: vegeta-cfg +data: + cfg: | + POST http://app-fastapi.default.svc.cluster.local:8080/predict + Content-Type: application/json + @/var/vegeta/payload + payload: | + { + "text": [ + "A rollercoaster of emotions with stunning visuals and remarkable performances. A must-see!", + "Despite its high production values, the plot is predictable and lacks originality.", + "An epic space opera that pulls you in with its intricate plot and complex characters.", + "Too reliant on CGI, and the storyline feels disjointed and hard to follow.", + "An extraordinary cinematic experience that beautifully captures the human spirit.", + "The pacing is too slow, and it tends to feel more like a documentary than a feature film.", + "A superb adaptation with a gripping plot and fascinating characters. Truly unforgettable.", + "Though the scenery is beautiful, the characters feel flat and the storyline lacks depth.", + "A touching story of love and loss, paired with phenomenal acting. It will leave you teary-eyed.", + "The script is clichéd, and the chemistry between the lead actors feels forced." + ] + } +--- +apiVersion: batch/v1 +kind: Job +metadata: + generateName: load-test- +spec: + backoffLimit: 6 + parallelism: 1 + template: + metadata: + annotations: + sidecar.istio.io/inject: "false" + spec: + restartPolicy: OnFailure + containers: + - name: vegeta + image: peterevans/vegeta:latest + imagePullPolicy: Always + command: + - sh + - -c + args: + - vegeta -cpus=2 attack -duration=1m -rate=100/1s -targets=/var/vegeta/cfg | vegeta report -type=text + volumeMounts: + - name: vegeta-cfg + mountPath: /var/vegeta + volumes: + - name: vegeta-cfg + configMap: + name: vegeta-cfg + defaultMode: 420 \ No newline at end of file diff --git a/module-6/queue/simple_queue.py b/module-6/queue/simple_queue.py new file mode 100644 index 0000000..90f3e7a --- /dev/null +++ b/module-6/queue/simple_queue.py @@ -0,0 +1,45 @@ +from fastapi import FastAPI +from pydantic import BaseModel +import modal + +app = modal.App("simple-queue") + +@app.function() +def process_job(data): + import time + time.sleep(60) + return {"result": data} + +web_app = FastAPI() + +def submit_job(data): + process_job = modal.Function.lookup("simple-queue", "process_job") + call = process_job.spawn(data) + return call.object_id + +def get_job_result(call_id): + function_call = modal.functions.FunctionCall.from_id(call_id) + try: + result = function_call.get(timeout=0) + except modal.exception.OutputExpiredError: + result = {"result": "expired"} + except TimeoutError: + result = {"result": "pending"} + return result + +class SubmitJobRequest(BaseModel): + data: str + +@web_app.post("/submit_job") +async def submit_job_endpoint(request: SubmitJobRequest): + call_id = submit_job(request.data) + return {"call_id": call_id} + +@web_app.get("/get_job_result") +async def get_job_result_endpoint(call_id: str): + result = get_job_result(call_id) + return result + +if __name__ == "__main__": + import uvicorn + uvicorn.run(web_app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/module-6/serving/fast_api.py b/module-6/serving/fast_api.py deleted file mode 100644 index fd97582..0000000 --- a/module-6/serving/fast_api.py +++ /dev/null @@ -1,29 +0,0 @@ -from typing import List - -from fastapi import FastAPI -from pydantic import BaseModel - -from serving.predictor import Predictor - - -class Payload(BaseModel): - text: List[str] - - -class Prediction(BaseModel): - probs: List[List[float]] - - -app = FastAPI() -predictor = Predictor.default_from_model_registry() - - -@app.get("/health_check") -def health_check() -> str: - return "ok" - - -@app.post("/predict", response_model=Prediction) -def predict(payload: Payload) -> Prediction: - prediction = predictor.predict(text=payload.text) - return Prediction(probs=prediction.tolist()) diff --git a/module-6/serving/kserve_api.py b/module-6/serving/kserve_api.py deleted file mode 100644 index ca388bf..0000000 --- a/module-6/serving/kserve_api.py +++ /dev/null @@ -1,40 +0,0 @@ -import argparse -import logging -from typing import Dict - -import kserve - -from serving.predictor import Predictor - -logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL) - - -class CustomModel(kserve.Model): - def __init__(self, name: str): - super().__init__(name) - self.name = name - self.predictor = None - self.load() - - def load(self): - self.predictor = Predictor.default_from_model_registry() - - def predict(self, payload: Dict, headers: Dict[str, str] = None) -> Dict: - logging.info(f"Received inputs {payload}") - text = payload["instances"] - logging.info(f"Received text {text}, {type(text)}") - result = self.predictor.predict(text=text) - logging.info(f"Resutls {result}, {type(result)}") - return {"predictions": result.tolist()} - - -DEFAULT_MODEL_NAME = "kserve-custom" -parser = argparse.ArgumentParser(parents=[kserve.model_server.parser]) -parser.add_argument("--model_name", default=DEFAULT_MODEL_NAME, help="The name that the model is served under.") - -args, _ = parser.parse_known_args() - -if __name__ == "__main__": - custom_model = CustomModel(args.model_name) - server = kserve.ModelServer() - server.start(models=[custom_model]) diff --git a/module-6/serving/kserve_transformer.py b/module-6/serving/kserve_transformer.py deleted file mode 100644 index b54d8a8..0000000 --- a/module-6/serving/kserve_transformer.py +++ /dev/null @@ -1,72 +0,0 @@ -import argparse -import datetime -import json -import logging -import time -from typing import Dict, Union - -import boto3 -import kserve -from cloudevents.http import CloudEvent -from kserve import InferRequest, InferResponse -from kserve.protocol.grpc.grpc_predict_v2_pb2 import ModelInferResponse - -logging.basicConfig(level=kserve.constants.KSERVE_LOGLEVEL) - -session = boto3.Session() -client = session.client("s3", endpoint_url="http://minio-service:9000", aws_access_key_id="minio", aws_secret_access_key="minio123") -digits_bucket = "output" - - -class ImageTransformer(kserve.Model): - def __init__(self, name: str, predictor_host: str): - super().__init__(name) - self.predictor_host = predictor_host - self._key = None - - def preprocess(self, inputs: Union[Dict, CloudEvent, InferRequest], headers: Dict[str, str] = None) -> Union[Dict, InferRequest]: - - logging.info("Received inputs %s", inputs) - data = json.loads(inputs.get_data().decode("utf-8")) - inputs = data - if inputs["EventName"] == "s3:ObjectCreated:Put": - bucket = inputs["Records"][0]["s3"]["bucket"]["name"] - key = inputs["Records"][0]["s3"]["object"]["key"] - self._key = key - client.download_file(bucket, key, "/tmp/" + key) - - with open("/tmp/" + key, "r") as f: - instances = json.load(f)["instances"] - logging.info(f"instances {instances}") - - return {"instances": instances} - raise Exception("unknown event") - - def postprocess(self, response: Union[Dict, InferResponse, ModelInferResponse], headers: Dict[str, str] = None) -> Union[Dict, ModelInferResponse]: - logging.info( - f"response: {response}", - ) - predictions = response["predictions"] - logging.info(f"predictions: {predictions}") - - upload_path = f"predictions_{time.time()}-{self._key}" - with open(upload_path, "w") as f: - json.dump(predictions, f) - - client.upload_file(upload_path, digits_bucket, upload_path) - logging.info(f"Image {self._key} successfully uploaded to {upload_path}") - return response - - -DEFAULT_MODEL_NAME = "custom-model" - -parser = argparse.ArgumentParser(parents=[kserve.model_server.parser]) -parser.add_argument("--model_name", default=DEFAULT_MODEL_NAME, help="The name that the model is served under.") -parser.add_argument("--predictor_host", help="The URL for the model predict function", required=True) - -args, _ = parser.parse_known_args() - -if __name__ == "__main__": - transformer = ImageTransformer(args.model_name, predictor_host=args.predictor_host) - server = kserve.ModelServer() - server.start(models=[transformer]) diff --git a/module-6/serving/predictor.py b/module-6/serving/predictor.py deleted file mode 100644 index 2c5184d..0000000 --- a/module-6/serving/predictor.py +++ /dev/null @@ -1,55 +0,0 @@ -import logging -from pathlib import Path -from typing import List - -import pandas as pd -import torch -import wandb -from filelock import FileLock -from torch.nn.functional import softmax -from tqdm import tqdm -from transformers import AutoModelForSequenceClassification, AutoTokenizer - -logger = logging.getLogger() - -MODEL_ID = "truskovskiyk/course-27-10-2023-week-3/airflow-pipeline:latest" -MODEL_PATH = "/tmp/model" -MODEL_LOCK = ".lock-file" -WANDB_KEY = "cb86168a2e8db7edb905da69307450f5e7867d66" - -def load_from_registry(model_name: str, model_path: Path): - wandb.login(key=WANDB_KEY) - with wandb.init() as run: - artifact = run.use_artifact(model_name, type="model") - artifact_dir = artifact.download(root=model_path) - print(f"{artifact_dir}") - - -class Predictor: - def __init__(self, model_load_path: str): - self.tokenizer = AutoTokenizer.from_pretrained(model_load_path) - self.model = AutoModelForSequenceClassification.from_pretrained(model_load_path) - self.model.eval() - - @torch.no_grad() - def predict(self, text: List[str]): - text_encoded = self.tokenizer.batch_encode_plus(list(text), return_tensors="pt", padding=True) - bert_outputs = self.model(**text_encoded).logits - return softmax(bert_outputs).numpy() - - @classmethod - def default_from_model_registry(cls) -> "Predictor": - with FileLock(MODEL_LOCK): - if not (Path(MODEL_PATH) / "model.safetensors").exists(): - load_from_registry(model_name=MODEL_ID, model_path=MODEL_PATH) - - return cls(model_load_path=MODEL_PATH) - - def run_inference_on_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: - correct_sentence_conf = [] - for idx in tqdm(range(len(df))): - sentence = df.iloc[idx]["sentence"] - conf = self.predict([sentence]).flatten()[1] - correct_sentence_conf.append(conf) - df["correct_sentence_conf"] = correct_sentence_conf - return df