diff --git a/ChatQnA/kubernetes/manifests/README.md b/ChatQnA/kubernetes/README.md similarity index 100% rename from ChatQnA/kubernetes/manifests/README.md rename to ChatQnA/kubernetes/README.md diff --git a/ChatQnA/kubernetes/chatQnA_gaudi.yaml b/ChatQnA/kubernetes/chatQnA_gaudi.yaml index a8aabae95..d2b9905fe 100644 --- a/ChatQnA/kubernetes/chatQnA_gaudi.yaml +++ b/ChatQnA/kubernetes/chatQnA_gaudi.yaml @@ -23,6 +23,7 @@ spec: serviceName: embedding-svc config: endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc - name: TeiEmbeddingGaudi internalService: serviceName: tei-embedding-gaudi-svc @@ -33,6 +34,8 @@ spec: serviceName: retriever-svc config: endpoint: /v1/retrieval + REDIS_URL: redis-vector-db + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc - name: VectorDB internalService: serviceName: redis-vector-db @@ -43,6 +46,7 @@ spec: serviceName: reranking-svc config: endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc - name: TeiReranking internalService: serviceName: tei-reranking-svc @@ -55,6 +59,7 @@ spec: serviceName: llm-svc config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-svc - name: TgiGaudi internalService: serviceName: tgi-gaudi-svc diff --git a/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml b/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml new file mode 100644 index 000000000..0af8cebda --- /dev/null +++ b/ChatQnA/kubernetes/chatQnA_switch_gaudi.yaml @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: gaudi + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small + - name: TeiEmbeddingGaudi + internalService: + serviceName: tei-embedding-gaudi-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbeddingGaudi + internalService: + serviceName: tei-embedding-gaudi-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-gaudi-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-service-llama + - name: TgiGaudi + internalService: + serviceName: tgi-gaudi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + isDownstreamService: true + - name: TgiGaudi + internalService: + serviceName: tgi-gaudi-service-llama + config: + endpoint: /generate + MODEL_ID: openlm-research/open_llama_3b + isDownstreamService: true diff --git a/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml b/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml new file mode 100644 index 000000000..4f06a2106 --- /dev/null +++ b/ChatQnA/kubernetes/chatQnA_switch_xeon.yaml @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: xeon + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-llama + - name: Tgi + internalService: + serviceName: tgi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + isDownstreamService: true + - name: Tgi + internalService: + serviceName: tgi-service-llama + config: + endpoint: /generate + MODEL_ID: bigscience/bloom-560m + isDownstreamService: true diff --git a/ChatQnA/kubernetes/chatQnA_xeon.yaml b/ChatQnA/kubernetes/chatQnA_xeon.yaml index 4d91ce59b..ff7cce725 100644 --- a/ChatQnA/kubernetes/chatQnA_xeon.yaml +++ b/ChatQnA/kubernetes/chatQnA_xeon.yaml @@ -23,6 +23,7 @@ spec: serviceName: embedding-svc config: endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc - name: TeiEmbedding internalService: serviceName: tei-embedding-svc @@ -33,6 +34,8 @@ spec: serviceName: retriever-svc config: endpoint: /v1/retrieval + REDIS_URL: redis-vector-db + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc - name: VectorDB internalService: serviceName: redis-vector-db @@ -43,6 +46,7 @@ spec: serviceName: reranking-svc config: endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc - name: TeiReranking internalService: serviceName: tei-reranking-svc @@ -55,6 +59,7 @@ spec: serviceName: llm-svc config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-m - name: Tgi internalService: serviceName: tgi-service-m diff --git a/ChatQnA/tests/test_gmc_on_gaudi.sh b/ChatQnA/tests/test_gmc_on_gaudi.sh index 6dbfd677f..7e09dcf1d 100755 --- a/ChatQnA/tests/test_gmc_on_gaudi.sh +++ b/ChatQnA/tests/test_gmc_on_gaudi.sh @@ -64,7 +64,7 @@ function validate_chatqna() { echo "Checking response results, make sure the output is reasonable. " local status=false if [[ -f $LOG_PATH/curl_chatqna.log ]] && \ - [[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then status=true fi if [ $status == false ]; then diff --git a/ChatQnA/tests/test_gmc_on_xeon.sh b/ChatQnA/tests/test_gmc_on_xeon.sh index 1fe53de88..132f62999 100755 --- a/ChatQnA/tests/test_gmc_on_xeon.sh +++ b/ChatQnA/tests/test_gmc_on_xeon.sh @@ -65,7 +65,7 @@ function validate_chatqna() { echo "Checking response results, make sure the output is reasonable. " local status=false if [[ -f $LOG_PATH/curl_chatqna.log ]] && \ - [[ $(grep -c "billion" $LOG_PATH/curl_chatqna.log) != 0 ]]; then + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna.log) != 0 ]]; then status=true fi if [ $status == false ]; then diff --git a/CodeGen/kubernetes/README.md b/CodeGen/kubernetes/README.md new file mode 100644 index 000000000..09e7d81a7 --- /dev/null +++ b/CodeGen/kubernetes/README.md @@ -0,0 +1,40 @@ +

Deploy CodeGen in a Kubernetes Cluster

+ +This document outlines the deployment process for a Code Generation (CodeGen) application that utilizes the [GenAIComps](https://github.com/opea-project/GenAIComps.git) microservice components on Intel Xeon servers and Gaudi machines. + +Please install GMC in your Kubernetes cluster, if you have not already done so, by following the steps in Section "Getting Started" at [GMC Install](https://github.com/opea-project/GenAIInfra/tree/main/microservices-connector#readme). We will soon publish images to Docker Hub, at which point no builds will be required, further simplifying install. + +If you have only Intel Xeon machines you could use the codegen_xeon.yaml file or if you have a Gaudi cluster you could use codegen_gaudi.yaml +In the below example we illustrate on Xeon. + +## Deploy the RAG application + +1. Create the desired namespace if it does not already exist and deploy the application +```bash +export APP_NAMESPACE=CT +kubectl create ns $APP_NAMESPACE +sed -i "s|namespace: codegen|namespace: $APP_NAMESPACE|g" ./codegen_xeon.yaml +kubectl apply -f ./codegen_xeon.yaml +``` + +2. Check if the application is up and ready +```bash +kubectl get pods -n $APP_NAMESPACE +``` + +3. Deploy a client pod for testing +```bash +kubectl create deployment client-test -n $APP_NAMESPACE --image=python:3.8.13 -- sleep infinity +``` + +4. Check that client pod is ready +```bash + kubectl get pods -n $APP_NAMESPACE +``` + +5. Send request to application +```bash +export CLIENT_POD=$(kubectl get pod -n $APP_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) +export accessUrl=$(kubectl get gmc -n $APP_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='codegen')].status.accessUrl}") +kubectl exec "$CLIENT_POD" -n $APP_NAMESPACE -- curl $accessUrl -X POST -d '{"query": "def print_hello_world():"}' -H 'Content-Type: application/json' > $LOG_PATH/gmc_codegen.log +``` diff --git a/CodeGen/kubernetes/codegen_gaudi.yaml b/CodeGen/kubernetes/codegen_gaudi.yaml index 1c2dbcb7c..eb0239464 100644 --- a/CodeGen/kubernetes/codegen_gaudi.yaml +++ b/CodeGen/kubernetes/codegen_gaudi.yaml @@ -24,10 +24,11 @@ spec: serviceName: llm-service config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-svc - name: TgiGaudi internalService: serviceName: tgi-gaudi-svc config: - LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B + MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B endpoint: /generate isDownstreamService: true diff --git a/CodeGen/kubernetes/codegen_xeon.yaml b/CodeGen/kubernetes/codegen_xeon.yaml index 5cce537e2..c6bf745da 100644 --- a/CodeGen/kubernetes/codegen_xeon.yaml +++ b/CodeGen/kubernetes/codegen_xeon.yaml @@ -24,10 +24,11 @@ spec: serviceName: llm-service config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service - name: Tgi internalService: serviceName: tgi-service config: - LLM_MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B + MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B endpoint: /generate isDownstreamService: true diff --git a/CodeGen/tests/test_gmc_on_gaudi.sh b/CodeGen/tests/test_gmc_on_gaudi.sh index 94ab6bab4..ed87d1aee 100755 --- a/CodeGen/tests/test_gmc_on_gaudi.sh +++ b/CodeGen/tests/test_gmc_on_gaudi.sh @@ -44,7 +44,7 @@ function validate_codegen() { echo "Checking response results, make sure the output is reasonable. " local status=false if [[ -f $LOG_PATH/gmc_codegen.log ]] && \ - [[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then + [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then status=true fi if [ $status == false ]; then diff --git a/CodeGen/tests/test_gmc_on_xeon.sh b/CodeGen/tests/test_gmc_on_xeon.sh index e9496e5f9..ae3140564 100755 --- a/CodeGen/tests/test_gmc_on_xeon.sh +++ b/CodeGen/tests/test_gmc_on_xeon.sh @@ -44,7 +44,7 @@ function validate_codegen() { echo "Checking response results, make sure the output is reasonable. " local status=false if [[ -f $LOG_PATH/gmc_codegen.log ]] && \ - [[ $(grep -c "print" $LOG_PATH/gmc_codegen.log) != 0 ]]; then + [[ $(grep -c "[DONE]" $LOG_PATH/gmc_codegen.log) != 0 ]]; then status=true fi if [ $status == false ]; then diff --git a/CodeTrans/kubernetes/codetrans_gaudi.yaml b/CodeTrans/kubernetes/codetrans_gaudi.yaml index 77ba13123..5bc1bd5e2 100644 --- a/CodeTrans/kubernetes/codetrans_gaudi.yaml +++ b/CodeTrans/kubernetes/codetrans_gaudi.yaml @@ -24,10 +24,11 @@ spec: serviceName: codetrans-service config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-gaudi-svc - name: TgiGaudi internalService: serviceName: tgi-gaudi-svc config: - LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: HuggingFaceH4/mistral-7b-grok endpoint: /generate isDownstreamService: true diff --git a/CodeTrans/kubernetes/codetrans_xeon.yaml b/CodeTrans/kubernetes/codetrans_xeon.yaml index b94aa72ca..889a1d21a 100644 --- a/CodeTrans/kubernetes/codetrans_xeon.yaml +++ b/CodeTrans/kubernetes/codetrans_xeon.yaml @@ -24,10 +24,11 @@ spec: serviceName: codetrans-service config: endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service - name: Tgi internalService: serviceName: tgi-service config: - LLM_MODEL_ID: HuggingFaceH4/mistral-7b-grok + MODEL_ID: HuggingFaceH4/mistral-7b-grok endpoint: /generate isDownstreamService: true diff --git a/DocSum/kubernetes/docsum_gaudi.yaml b/DocSum/kubernetes/docsum_gaudi.yaml index 7a3755c23..9b7a1ef30 100644 --- a/DocSum/kubernetes/docsum_gaudi.yaml +++ b/DocSum/kubernetes/docsum_gaudi.yaml @@ -18,13 +18,14 @@ spec: root: routerType: Sequence steps: - - name: DocSumGaudi + - name: DocSum data: $response internalService: serviceName: docsum-llm-uservice config: endpoint: /v1/chat/docsum PORT: "9009" + TGI_LLM_ENDPOINT: tgi-gaudi-svc - name: TgiGaudi internalService: serviceName: tgi-gaudi-svc diff --git a/DocSum/kubernetes/docsum_xeon.yaml b/DocSum/kubernetes/docsum_xeon.yaml index c07f03e6f..09a72e0f1 100644 --- a/DocSum/kubernetes/docsum_xeon.yaml +++ b/DocSum/kubernetes/docsum_xeon.yaml @@ -25,6 +25,7 @@ spec: config: endpoint: /v1/chat/docsum PORT: "9009" + TGI_LLM_ENDPOINT: tgi-svc - name: Tgi internalService: serviceName: tgi-svc