From a0c726629ed01618271bddae5326dad14668244d Mon Sep 17 00:00:00 2001
From: jagadeesh <jagadeeshj@ideas2it.com>
Date: Fri, 14 Jul 2023 22:48:40 +0530
Subject: [PATCH 1/3] feat: add session affinity to k8s TS

Signed-off-by: jagadeesh <jagadeeshj@ideas2it.com>
---
 .pre-commit-config.yaml                   |  5 ++-
 kubernetes/Helm/templates/torchserve.yaml |  8 +++-
 kubernetes/Helm/values.yaml               |  4 +-
 kubernetes/README.md                      | 50 ++++++++++++++++++++++-
 kubernetes/destination_rule.yaml          | 13 ++++++
 kubernetes/gateway.yaml                   | 14 +++++++
 kubernetes/virtual_service.yaml           | 28 +++++++++++++
 ts_scripts/torchserve_grpc_client.py      | 17 ++++----
 8 files changed, 125 insertions(+), 14 deletions(-)
 create mode 100644 kubernetes/destination_rule.yaml
 create mode 100644 kubernetes/gateway.yaml
 create mode 100644 kubernetes/virtual_service.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ec9f575678..78603bdafb 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -12,6 +12,7 @@ repos:
       - id: check-json
       - id: check-toml
       - id: check-yaml
+        args: [--allow-multiple-documents, --unsafe]
       - id: end-of-file-fixer
       - id: mixed-line-ending
       - id: trailing-whitespace
@@ -24,12 +25,12 @@ repos:
       - id: python-no-log-warn
       - id: python-use-type-annotations
   - repo: https://github.com/hadialqattan/pycln
-    rev: v2.1.3
+    rev: v2.1.5
     hooks:
       - id: pycln
         args: [--all]
   - repo: https://github.com/psf/black
-    rev: 23.1.0
+    rev: 23.7.0
     hooks:
       - id: black
         additional_dependencies: ['click==8.0.4']
diff --git a/kubernetes/Helm/templates/torchserve.yaml b/kubernetes/Helm/templates/torchserve.yaml
index e847246100..f086afac12 100644
--- a/kubernetes/Helm/templates/torchserve.yaml
+++ b/kubernetes/Helm/templates/torchserve.yaml
@@ -12,14 +12,16 @@ spec:
   ports:
   - name: preds
     port: {{ .Values.torchserve.inference_port }}
-    targetPort: ts 
+    targetPort: ts
   - name: mdl
     port: {{ .Values.torchserve.management_port }}
     targetPort: ts-management
   - name: metrics
     port: {{ .Values.torchserve.metrics_port }}
     targetPort: ts-metrics
-  type: LoadBalancer
+  - name: grpc
+    port: {{ .Values.torchserve.grpc_inference_port }}
+    targetPort: ts-grpc
   selector:
     app: torchserve
 ---
@@ -54,6 +56,8 @@ spec:
           containerPort: {{ .Values.torchserve.management_port }}
         - name: ts-metrics
           containerPort: {{ .Values.torchserve.metrics_port }}
+        - name: ts-grpc
+          containerPort: {{ .Values.torchserve.grpc_inference_port }}
         imagePullPolicy: IfNotPresent
         volumeMounts:
           - mountPath: {{ .Values.torchserve.pvd_mount }}
diff --git a/kubernetes/Helm/values.yaml b/kubernetes/Helm/values.yaml
index fb74a4277c..cd8dbc81ac 100644
--- a/kubernetes/Helm/values.yaml
+++ b/kubernetes/Helm/values.yaml
@@ -8,13 +8,15 @@ torchserve:
   management_port: 8081
   inference_port: 8080
   metrics_port: 8082
+  grpc_inference_port: 7070
+
   pvd_mount: /home/model-server/shared/
   n_gpu: 4
   n_cpu: 16
   memory_limit: 32Gi
 
 deployment:
-  replicas: 1
+  replicas: 2
 
 persistentVolume:
   name: efs-claim
diff --git a/kubernetes/README.md b/kubernetes/README.md
index f94f15f937..275d6395d0 100644
--- a/kubernetes/README.md
+++ b/kubernetes/README.md
@@ -1,5 +1,5 @@
 # Torchserve on Kubernetes
-  
+
 ## Overview
 
 This page demonstrates a Torchserve deployment in Kubernetes using Helm Charts. It uses the DockerHub Torchserve Image for the pods and a PersistentVolume for storing config / model files.
@@ -53,6 +53,7 @@ torchserve:
   management_port: 8081
   inference_port: 8080
   metrics_port: 8082
+  grpc_inference_port: 7070
   pvd_mount: /home/model-server/shared/
   n_gpu: 1
   n_cpu: 1
@@ -66,7 +67,7 @@ persitant_volume:
   size: 1Gi
 ```
 
-To install Torchserve run ```helm install ts .```  
+To install Torchserve run ```helm install ts .```
 
 ```bash
 ubuntu@ip-172-31-50-36:~/serve/kubernetes/Helm$ helm install ts .
@@ -283,6 +284,51 @@ Follow the link for log aggregation with EFK Stack.\
 ## Autoscaling
   [Autoscaling with torchserve metrics](autoscale.md)
 
+## Session Affinity with Multiple Torchserve pods
+
+### Pre-requisites
+
+ - Follow the instructions above and deploy Torchserve with more than 1 replica to the kubernetes cluster
+ - Download Istio and add to path as shown [here](https://istio.io/latest/docs/setup/getting-started/#download)
+ - Install Istio with below command
+   - `istioctl install --set meshConfig.accessLogFile=/dev/stdout`
+
+### Steps
+
+Now we have multiple replicas of Torchserve running and istio installed. We can apply gateway, virtual service and destination rule to enable session affinity to the user requests.
+
+ - Apply the istio gateway via `kubectl apply -f gateway.yaml`
+   - This gateway exposes all the host behind it via port 80 as defined in the yaml file.
+ - Apply the virtual service with command `kubectl apply -f virtual_service.yaml`
+   - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwared to port `7070` for Torchserve service.
+ - Apply the destination Rule using the command `kubectl apply -f destination_rule.yaml`.
+   - The destination rule look for a http cookie with a key `session_id`. The request with `session_id` is served by the same pod that served the previous request with the same `session_id`
+
+### HTTP Inference
+
+- Fetch the external IP from istio-ingress gateway using the below command
+
+```bash
+ubuntu@ubuntu$ kubectl get svc -n istio-system
+NAME                   TYPE           CLUSTER-IP      EXTERNAL-IP                                                               PORT(S)                                                   AGE
+istio-ingressgateway   LoadBalancer   10.100.84.243   a918b2zzzzzzzzzzzzzzzzzzzzzz-1466623565.us-west-2.elb.amazonaws.com   15021:32270/TCP,80:31978/TCP,443:31775/TCP,70:31778/TCP   2d6h
+```
+
+- Make Request as shown below
+
+```bash
+curl -v -H "protocol: rest" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/<model_name> -d "data=<input-string>"
+```
+
+### gRPC Inference
+
+- Refer [grpc_api](../docs/grpc_api.md) to generate python files and run
+
+```bash
+python ts_scripts/torchserve_grpc_client.py infer <model_name> <input-string>
+```
+
+
 ## Roadmap
 
 * [] Log / Metrics Aggregation using [AWS Container Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html)
diff --git a/kubernetes/destination_rule.yaml b/kubernetes/destination_rule.yaml
new file mode 100644
index 0000000000..b334fa4106
--- /dev/null
+++ b/kubernetes/destination_rule.yaml
@@ -0,0 +1,13 @@
+apiVersion: networking.istio.io/v1alpha3
+kind: DestinationRule
+metadata:
+  name: torchserve-dr
+spec:
+  host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
+  trafficPolicy:
+    loadBalancer:
+      consistentHash:
+        # httpHeaderName: x-user
+        httpCookie:
+          name: session_id
+          ttl: 60s
diff --git a/kubernetes/gateway.yaml b/kubernetes/gateway.yaml
new file mode 100644
index 0000000000..b2ecfca23b
--- /dev/null
+++ b/kubernetes/gateway.yaml
@@ -0,0 +1,14 @@
+apiVersion: networking.istio.io/v1beta1
+kind: Gateway
+metadata:
+  name: torchserve-gw
+spec:
+  selector:
+    istio: ingressgateway
+  servers:
+  - hosts:
+    - '*'
+    port:
+      name: http
+      number: 80
+      protocol: HTTP
diff --git a/kubernetes/virtual_service.yaml b/kubernetes/virtual_service.yaml
new file mode 100644
index 0000000000..2a699e46ea
--- /dev/null
+++ b/kubernetes/virtual_service.yaml
@@ -0,0 +1,28 @@
+apiVersion: networking.istio.io/v1alpha3
+kind: VirtualService
+metadata:
+  name: torchserve-vs
+spec:
+  hosts:
+    - '*'
+  gateways:
+    - torchserve-gw
+  http:
+    - match:
+      - headers:
+          protocol:
+            exact: rest
+      route:
+      - destination:
+          host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
+          port:
+            number: 8080
+    - match:
+      - headers:
+          protocol:
+            exact: grpc
+      route:
+      - destination:
+          host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
+          port:
+            number: 7070
diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py
index ccf293ed3f..8e6d4a96a8 100644
--- a/ts_scripts/torchserve_grpc_client.py
+++ b/ts_scripts/torchserve_grpc_client.py
@@ -19,13 +19,14 @@ def get_management_stub():
     return stub
 
 
-def infer(stub, model_name, model_input):
+def infer(stub, model_name, model_input, metadata):
     with open(model_input, "rb") as f:
         data = f.read()
 
     input_data = {"data": data}
     response = stub.Predictions(
-        inference_pb2.PredictionsRequest(model_name=model_name, input=input_data)
+        inference_pb2.PredictionsRequest(model_name=model_name, input=input_data),
+        metadata=metadata,
     )
 
     try:
@@ -35,13 +36,14 @@ def infer(stub, model_name, model_input):
         exit(1)
 
 
-def infer_stream(stub, model_name, model_input):
+def infer_stream(stub, model_name, model_input, metadata):
     with open(model_input, "rb") as f:
         data = f.read()
 
     input_data = {"data": data}
     responses = stub.StreamPredictions(
-        inference_pb2.PredictionsRequest(model_name=model_name, input=input_data)
+        inference_pb2.PredictionsRequest(model_name=model_name, input=input_data),
+        metadata=metadata,
     )
 
     try:
@@ -92,7 +94,6 @@ def unregister(stub, model_name):
 
 
 if __name__ == "__main__":
-
     parent_parser = argparse.ArgumentParser(add_help=False)
     parent_parser.add_argument(
         "model_name",
@@ -141,10 +142,12 @@ def unregister(stub, model_name):
 
     args = parser.parse_args()
 
+    metadata = (("protocol", "grpc"), ("session_id", "12345"))
+
     if args.action == "infer":
-        infer(get_inference_stub(), args.model_name, args.model_input)
+        infer(get_inference_stub(), args.model_name, args.model_input, metadata)
     elif args.action == "infer_stream":
-        infer_stream(get_inference_stub(), args.model_name, args.model_input)
+        infer_stream(get_inference_stub(), args.model_name, args.model_input, metadata)
     elif args.action == "register":
         register(get_management_stub(), args.model_name, args.mar_set)
     elif args.action == "unregister":

From 2accb19d53c3255fe1052d18bd52ae82405557ab Mon Sep 17 00:00:00 2001
From: jagadeesh <jagadeeshj@ideas2it.com>
Date: Mon, 7 Aug 2023 11:52:18 +0530
Subject: [PATCH 2/3] fix spell check

Signed-off-by: jagadeesh <jagadeeshj@ideas2it.com>
---
 kubernetes/README.md                    | 2 +-
 ts_scripts/spellcheck_conf/wordlist.txt | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/kubernetes/README.md b/kubernetes/README.md
index 275d6395d0..59e9b067d4 100644
--- a/kubernetes/README.md
+++ b/kubernetes/README.md
@@ -300,7 +300,7 @@ Now we have multiple replicas of Torchserve running and istio installed. We can
  - Apply the istio gateway via `kubectl apply -f gateway.yaml`
    - This gateway exposes all the host behind it via port 80 as defined in the yaml file.
  - Apply the virtual service with command `kubectl apply -f virtual_service.yaml`
-   - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwared to port `7070` for Torchserve service.
+   - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwarded to port `7070` for Torchserve service.
  - Apply the destination Rule using the command `kubectl apply -f destination_rule.yaml`.
    - The destination rule look for a http cookie with a key `session_id`. The request with `session_id` is served by the same pod that served the previous request with the same `session_id`
 
diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt
index 5c5e1f4b88..5b6c7dbb06 100644
--- a/ts_scripts/spellcheck_conf/wordlist.txt
+++ b/ts_scripts/spellcheck_conf/wordlist.txt
@@ -1063,3 +1063,6 @@ inferentia
 ActionSLAM
 statins
 chatGPT
+accessLogFile
+istioctl
+meshConfig

From 2c6d096783bba0609cd6ddedf6044722abb9a2f5 Mon Sep 17 00:00:00 2001
From: jagadeesh <jagadeeshj@ideas2it.com>
Date: Sat, 12 Aug 2023 09:20:28 +0530
Subject: [PATCH 3/3] fix docs

Signed-off-by: jagadeesh <jagadeeshj@ideas2it.com>
---
 kubernetes/README.md                 |  2 +-
 kubernetes/virtual_service.yaml      | 38 +++++++++++++++++-----------
 ts_scripts/torchserve_grpc_client.py |  2 +-
 3 files changed, 25 insertions(+), 17 deletions(-)

diff --git a/kubernetes/README.md b/kubernetes/README.md
index 59e9b067d4..d737f11542 100644
--- a/kubernetes/README.md
+++ b/kubernetes/README.md
@@ -317,7 +317,7 @@ istio-ingressgateway   LoadBalancer   10.100.84.243   a918b2zzzzzzzzzzzzzzzzzzzz
 - Make Request as shown below
 
 ```bash
-curl -v -H "protocol: rest" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/<model_name> -d "data=<input-string>"
+curl -v -H "protocol: REST" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/<model_name> -d "data=<input-string>"
 ```
 
 ### gRPC Inference
diff --git a/kubernetes/virtual_service.yaml b/kubernetes/virtual_service.yaml
index 2a699e46ea..889f6c0d22 100644
--- a/kubernetes/virtual_service.yaml
+++ b/kubernetes/virtual_service.yaml
@@ -4,25 +4,33 @@ metadata:
   name: torchserve-vs
 spec:
   hosts:
-    - '*'
+    - "*"
   gateways:
     - torchserve-gw
   http:
     - match:
-      - headers:
-          protocol:
-            exact: rest
+        - uri:
+            prefix: /metrics
       route:
-      - destination:
-          host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
-          port:
-            number: 8080
+        - destination:
+            host: torchserve.default.svc.cluster.local
+            port:
+              number: 8082
     - match:
-      - headers:
-          protocol:
-            exact: grpc
+        - headers:
+            protocol:
+              exact: REST
       route:
-      - destination:
-          host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
-          port:
-            number: 7070
+        - destination:
+            host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
+            port:
+              number: 8080
+    - match:
+        - headers:
+            protocol:
+              exact: gRPC
+      route:
+        - destination:
+            host: torchserve.default.svc.cluster.local # <ts-service-name>.<namespace>.svc.cluster.local
+            port:
+              number: 7070
diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py
index 8e6d4a96a8..a1868884c1 100644
--- a/ts_scripts/torchserve_grpc_client.py
+++ b/ts_scripts/torchserve_grpc_client.py
@@ -142,7 +142,7 @@ def unregister(stub, model_name):
 
     args = parser.parse_args()
 
-    metadata = (("protocol", "grpc"), ("session_id", "12345"))
+    metadata = (("protocol", "gRPC"), ("session_id", "12345"))
 
     if args.action == "infer":
         infer(get_inference_stub(), args.model_name, args.model_input, metadata)