From a0c726629ed01618271bddae5326dad14668244d Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Fri, 14 Jul 2023 22:48:40 +0530 Subject: [PATCH 1/3] feat: add session affinity to k8s TS Signed-off-by: jagadeesh --- .pre-commit-config.yaml | 5 ++- kubernetes/Helm/templates/torchserve.yaml | 8 +++- kubernetes/Helm/values.yaml | 4 +- kubernetes/README.md | 50 ++++++++++++++++++++++- kubernetes/destination_rule.yaml | 13 ++++++ kubernetes/gateway.yaml | 14 +++++++ kubernetes/virtual_service.yaml | 28 +++++++++++++ ts_scripts/torchserve_grpc_client.py | 17 ++++---- 8 files changed, 125 insertions(+), 14 deletions(-) create mode 100644 kubernetes/destination_rule.yaml create mode 100644 kubernetes/gateway.yaml create mode 100644 kubernetes/virtual_service.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec9f575678..78603bdafb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -12,6 +12,7 @@ repos: - id: check-json - id: check-toml - id: check-yaml + args: [--allow-multiple-documents, --unsafe] - id: end-of-file-fixer - id: mixed-line-ending - id: trailing-whitespace @@ -24,12 +25,12 @@ repos: - id: python-no-log-warn - id: python-use-type-annotations - repo: https://github.com/hadialqattan/pycln - rev: v2.1.3 + rev: v2.1.5 hooks: - id: pycln args: [--all] - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.7.0 hooks: - id: black additional_dependencies: ['click==8.0.4'] diff --git a/kubernetes/Helm/templates/torchserve.yaml b/kubernetes/Helm/templates/torchserve.yaml index e847246100..f086afac12 100644 --- a/kubernetes/Helm/templates/torchserve.yaml +++ b/kubernetes/Helm/templates/torchserve.yaml @@ -12,14 +12,16 @@ spec: ports: - name: preds port: {{ .Values.torchserve.inference_port }} - targetPort: ts + targetPort: ts - name: mdl port: {{ .Values.torchserve.management_port }} targetPort: ts-management - name: metrics port: {{ .Values.torchserve.metrics_port }} targetPort: ts-metrics - type: LoadBalancer + - name: grpc + port: {{ .Values.torchserve.grpc_inference_port }} + targetPort: ts-grpc selector: app: torchserve --- @@ -54,6 +56,8 @@ spec: containerPort: {{ .Values.torchserve.management_port }} - name: ts-metrics containerPort: {{ .Values.torchserve.metrics_port }} + - name: ts-grpc + containerPort: {{ .Values.torchserve.grpc_inference_port }} imagePullPolicy: IfNotPresent volumeMounts: - mountPath: {{ .Values.torchserve.pvd_mount }} diff --git a/kubernetes/Helm/values.yaml b/kubernetes/Helm/values.yaml index fb74a4277c..cd8dbc81ac 100644 --- a/kubernetes/Helm/values.yaml +++ b/kubernetes/Helm/values.yaml @@ -8,13 +8,15 @@ torchserve: management_port: 8081 inference_port: 8080 metrics_port: 8082 + grpc_inference_port: 7070 + pvd_mount: /home/model-server/shared/ n_gpu: 4 n_cpu: 16 memory_limit: 32Gi deployment: - replicas: 1 + replicas: 2 persistentVolume: name: efs-claim diff --git a/kubernetes/README.md b/kubernetes/README.md index f94f15f937..275d6395d0 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -1,5 +1,5 @@ # Torchserve on Kubernetes - + ## Overview This page demonstrates a Torchserve deployment in Kubernetes using Helm Charts. It uses the DockerHub Torchserve Image for the pods and a PersistentVolume for storing config / model files. @@ -53,6 +53,7 @@ torchserve: management_port: 8081 inference_port: 8080 metrics_port: 8082 + grpc_inference_port: 7070 pvd_mount: /home/model-server/shared/ n_gpu: 1 n_cpu: 1 @@ -66,7 +67,7 @@ persitant_volume: size: 1Gi ``` -To install Torchserve run ```helm install ts .``` +To install Torchserve run ```helm install ts .``` ```bash ubuntu@ip-172-31-50-36:~/serve/kubernetes/Helm$ helm install ts . @@ -283,6 +284,51 @@ Follow the link for log aggregation with EFK Stack.\ ## Autoscaling [Autoscaling with torchserve metrics](autoscale.md) +## Session Affinity with Multiple Torchserve pods + +### Pre-requisites + + - Follow the instructions above and deploy Torchserve with more than 1 replica to the kubernetes cluster + - Download Istio and add to path as shown [here](https://istio.io/latest/docs/setup/getting-started/#download) + - Install Istio with below command + - `istioctl install --set meshConfig.accessLogFile=/dev/stdout` + +### Steps + +Now we have multiple replicas of Torchserve running and istio installed. We can apply gateway, virtual service and destination rule to enable session affinity to the user requests. + + - Apply the istio gateway via `kubectl apply -f gateway.yaml` + - This gateway exposes all the host behind it via port 80 as defined in the yaml file. + - Apply the virtual service with command `kubectl apply -f virtual_service.yaml` + - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwared to port `7070` for Torchserve service. + - Apply the destination Rule using the command `kubectl apply -f destination_rule.yaml`. + - The destination rule look for a http cookie with a key `session_id`. The request with `session_id` is served by the same pod that served the previous request with the same `session_id` + +### HTTP Inference + +- Fetch the external IP from istio-ingress gateway using the below command + +```bash +ubuntu@ubuntu$ kubectl get svc -n istio-system +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +istio-ingressgateway LoadBalancer 10.100.84.243 a918b2zzzzzzzzzzzzzzzzzzzzzz-1466623565.us-west-2.elb.amazonaws.com 15021:32270/TCP,80:31978/TCP,443:31775/TCP,70:31778/TCP 2d6h +``` + +- Make Request as shown below + +```bash +curl -v -H "protocol: rest" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/ -d "data=" +``` + +### gRPC Inference + +- Refer [grpc_api](../docs/grpc_api.md) to generate python files and run + +```bash +python ts_scripts/torchserve_grpc_client.py infer +``` + + ## Roadmap * [] Log / Metrics Aggregation using [AWS Container Insights](https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights.html) diff --git a/kubernetes/destination_rule.yaml b/kubernetes/destination_rule.yaml new file mode 100644 index 0000000000..b334fa4106 --- /dev/null +++ b/kubernetes/destination_rule.yaml @@ -0,0 +1,13 @@ +apiVersion: networking.istio.io/v1alpha3 +kind: DestinationRule +metadata: + name: torchserve-dr +spec: + host: torchserve.default.svc.cluster.local # ..svc.cluster.local + trafficPolicy: + loadBalancer: + consistentHash: + # httpHeaderName: x-user + httpCookie: + name: session_id + ttl: 60s diff --git a/kubernetes/gateway.yaml b/kubernetes/gateway.yaml new file mode 100644 index 0000000000..b2ecfca23b --- /dev/null +++ b/kubernetes/gateway.yaml @@ -0,0 +1,14 @@ +apiVersion: networking.istio.io/v1beta1 +kind: Gateway +metadata: + name: torchserve-gw +spec: + selector: + istio: ingressgateway + servers: + - hosts: + - '*' + port: + name: http + number: 80 + protocol: HTTP diff --git a/kubernetes/virtual_service.yaml b/kubernetes/virtual_service.yaml new file mode 100644 index 0000000000..2a699e46ea --- /dev/null +++ b/kubernetes/virtual_service.yaml @@ -0,0 +1,28 @@ +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: torchserve-vs +spec: + hosts: + - '*' + gateways: + - torchserve-gw + http: + - match: + - headers: + protocol: + exact: rest + route: + - destination: + host: torchserve.default.svc.cluster.local # ..svc.cluster.local + port: + number: 8080 + - match: + - headers: + protocol: + exact: grpc + route: + - destination: + host: torchserve.default.svc.cluster.local # ..svc.cluster.local + port: + number: 7070 diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py index ccf293ed3f..8e6d4a96a8 100644 --- a/ts_scripts/torchserve_grpc_client.py +++ b/ts_scripts/torchserve_grpc_client.py @@ -19,13 +19,14 @@ def get_management_stub(): return stub -def infer(stub, model_name, model_input): +def infer(stub, model_name, model_input, metadata): with open(model_input, "rb") as f: data = f.read() input_data = {"data": data} response = stub.Predictions( - inference_pb2.PredictionsRequest(model_name=model_name, input=input_data) + inference_pb2.PredictionsRequest(model_name=model_name, input=input_data), + metadata=metadata, ) try: @@ -35,13 +36,14 @@ def infer(stub, model_name, model_input): exit(1) -def infer_stream(stub, model_name, model_input): +def infer_stream(stub, model_name, model_input, metadata): with open(model_input, "rb") as f: data = f.read() input_data = {"data": data} responses = stub.StreamPredictions( - inference_pb2.PredictionsRequest(model_name=model_name, input=input_data) + inference_pb2.PredictionsRequest(model_name=model_name, input=input_data), + metadata=metadata, ) try: @@ -92,7 +94,6 @@ def unregister(stub, model_name): if __name__ == "__main__": - parent_parser = argparse.ArgumentParser(add_help=False) parent_parser.add_argument( "model_name", @@ -141,10 +142,12 @@ def unregister(stub, model_name): args = parser.parse_args() + metadata = (("protocol", "grpc"), ("session_id", "12345")) + if args.action == "infer": - infer(get_inference_stub(), args.model_name, args.model_input) + infer(get_inference_stub(), args.model_name, args.model_input, metadata) elif args.action == "infer_stream": - infer_stream(get_inference_stub(), args.model_name, args.model_input) + infer_stream(get_inference_stub(), args.model_name, args.model_input, metadata) elif args.action == "register": register(get_management_stub(), args.model_name, args.mar_set) elif args.action == "unregister": From 2accb19d53c3255fe1052d18bd52ae82405557ab Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Mon, 7 Aug 2023 11:52:18 +0530 Subject: [PATCH 2/3] fix spell check Signed-off-by: jagadeesh --- kubernetes/README.md | 2 +- ts_scripts/spellcheck_conf/wordlist.txt | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/kubernetes/README.md b/kubernetes/README.md index 275d6395d0..59e9b067d4 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -300,7 +300,7 @@ Now we have multiple replicas of Torchserve running and istio installed. We can - Apply the istio gateway via `kubectl apply -f gateway.yaml` - This gateway exposes all the host behind it via port 80 as defined in the yaml file. - Apply the virtual service with command `kubectl apply -f virtual_service.yaml` - - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwared to port `7070` for Torchserve service. + - This with look for header named `protocol` in the incoming request and forward the request to Torchserve service. If the `protocol` header has a value `rest` then the request is forwarded to port `8080` of Torchserve service and if the `protocol` header has a value `grpc` then the request is forwarded to port `7070` for Torchserve service. - Apply the destination Rule using the command `kubectl apply -f destination_rule.yaml`. - The destination rule look for a http cookie with a key `session_id`. The request with `session_id` is served by the same pod that served the previous request with the same `session_id` diff --git a/ts_scripts/spellcheck_conf/wordlist.txt b/ts_scripts/spellcheck_conf/wordlist.txt index 5c5e1f4b88..5b6c7dbb06 100644 --- a/ts_scripts/spellcheck_conf/wordlist.txt +++ b/ts_scripts/spellcheck_conf/wordlist.txt @@ -1063,3 +1063,6 @@ inferentia ActionSLAM statins chatGPT +accessLogFile +istioctl +meshConfig From 2c6d096783bba0609cd6ddedf6044722abb9a2f5 Mon Sep 17 00:00:00 2001 From: jagadeesh Date: Sat, 12 Aug 2023 09:20:28 +0530 Subject: [PATCH 3/3] fix docs Signed-off-by: jagadeesh --- kubernetes/README.md | 2 +- kubernetes/virtual_service.yaml | 38 +++++++++++++++++----------- ts_scripts/torchserve_grpc_client.py | 2 +- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/kubernetes/README.md b/kubernetes/README.md index 59e9b067d4..d737f11542 100644 --- a/kubernetes/README.md +++ b/kubernetes/README.md @@ -317,7 +317,7 @@ istio-ingressgateway LoadBalancer 10.100.84.243 a918b2zzzzzzzzzzzzzzzzzzzz - Make Request as shown below ```bash -curl -v -H "protocol: rest" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/ -d "data=" +curl -v -H "protocol: REST" --cookie "session_id="12345" http://a918b2d70dbddzzzzzzzzzzz49ec8cf03b-1466623565.us-west-2.elb.amazonaws.com:80/predictions/ -d "data=" ``` ### gRPC Inference diff --git a/kubernetes/virtual_service.yaml b/kubernetes/virtual_service.yaml index 2a699e46ea..889f6c0d22 100644 --- a/kubernetes/virtual_service.yaml +++ b/kubernetes/virtual_service.yaml @@ -4,25 +4,33 @@ metadata: name: torchserve-vs spec: hosts: - - '*' + - "*" gateways: - torchserve-gw http: - match: - - headers: - protocol: - exact: rest + - uri: + prefix: /metrics route: - - destination: - host: torchserve.default.svc.cluster.local # ..svc.cluster.local - port: - number: 8080 + - destination: + host: torchserve.default.svc.cluster.local + port: + number: 8082 - match: - - headers: - protocol: - exact: grpc + - headers: + protocol: + exact: REST route: - - destination: - host: torchserve.default.svc.cluster.local # ..svc.cluster.local - port: - number: 7070 + - destination: + host: torchserve.default.svc.cluster.local # ..svc.cluster.local + port: + number: 8080 + - match: + - headers: + protocol: + exact: gRPC + route: + - destination: + host: torchserve.default.svc.cluster.local # ..svc.cluster.local + port: + number: 7070 diff --git a/ts_scripts/torchserve_grpc_client.py b/ts_scripts/torchserve_grpc_client.py index 8e6d4a96a8..a1868884c1 100644 --- a/ts_scripts/torchserve_grpc_client.py +++ b/ts_scripts/torchserve_grpc_client.py @@ -142,7 +142,7 @@ def unregister(stub, model_name): args = parser.parse_args() - metadata = (("protocol", "grpc"), ("session_id", "12345")) + metadata = (("protocol", "gRPC"), ("session_id", "12345")) if args.action == "infer": infer(get_inference_stub(), args.model_name, args.model_input, metadata)