From 3eaf45f00c094176277f72975082b61e2e8d7f80 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Sun, 8 Sep 2024 20:46:32 -0700 Subject: [PATCH 01/45] add no_wrapper code --- .../single_gaudi/chatqna_config_map.yaml | 26 ++++++ .../chatqna_mega_service_run.yaml | 55 ++++++++++++ .../dataprep-microservice_run.yaml | 75 ++++++++++++++++ .../embedding-dependency_run.yaml | 62 +++++++++++++ .../single_gaudi/llm-dependency_run.yaml | 88 +++++++++++++++++++ .../reranking-dependency_run.yaml | 85 ++++++++++++++++++ .../retrieval-microservice_run.yaml | 72 +++++++++++++++ .../single_gaudi/vector-db_run.yaml | 48 ++++++++++ 8 files changed, 511 insertions(+) create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml new file mode 100644 index 000000000..309fc6a74 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml @@ -0,0 +1,26 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: rerank-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml new file mode 100644 index 000000000..07658ff21 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml @@ -0,0 +1,55 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna_no_wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml new file mode 100644 index 000000000..4c71df7ce --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml @@ -0,0 +1,75 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml new file mode 100644 index 000000000..42a20871d --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml @@ -0,0 +1,62 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml new file mode 100644 index 000000000..093d2264b --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml @@ -0,0 +1,88 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml new file mode 100644 index 000000000..af908ecd1 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml new file mode 100644 index 000000000..ac6c12fdc --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml @@ -0,0 +1,72 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml new file mode 100644 index 000000000..e04e8c5fe --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml @@ -0,0 +1,48 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 From 31f28f6ddac867d25cd4ceea532233658d876f79 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Sun, 8 Sep 2024 23:07:08 -0700 Subject: [PATCH 02/45] fix name --- .../benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml index 309fc6a74..c7dcaf06d 100644 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml @@ -20,7 +20,7 @@ data: EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: rerank-dependency-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc NODE_SELECTOR: chatqna-opea # LLM_SERVICE_HOST_IP: llm-svc LLM_SERVER_HOST_IP: llm-dependency-svc From 9879f3c87d6964c14cb7136fc2dbb31b6e89c32b Mon Sep 17 00:00:00 2001 From: Spycsh Date: Mon, 9 Sep 2024 00:11:24 -0700 Subject: [PATCH 03/45] merge to one --- .../single_gaudi/chatqna_config_map.yaml | 26 - .../chatqna_mega_service_run.yaml | 55 -- .../dataprep-microservice_run.yaml | 75 --- .../embedding-dependency_run.yaml | 62 -- .../single_gaudi/llm-dependency_run.yaml | 88 --- .../no_wrapper_one_gaudi_with_rerank.yaml | 530 ++++++++++++++++++ .../reranking-dependency_run.yaml | 85 --- .../retrieval-microservice_run.yaml | 72 --- .../single_gaudi/vector-db_run.yaml | 48 -- 9 files changed, 530 insertions(+), 511 deletions(-) delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml delete mode 100644 ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml deleted file mode 100644 index c7dcaf06d..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_config_map.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # Intel/neural-chat-7b-v3-3 - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml deleted file mode 100644 index 07658ff21..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/chatqna_mega_service_run.yaml +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna_no_wrapper:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - args: null - ports: - - containerPort: 8888 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: chatqna-backend-server-svc -spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy - ports: - - name: service - port: 8888 - targetPort: 8888 - nodePort: 30888 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml deleted file mode 100644 index 4c71df7ce..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/dataprep-microservice_run.yaml +++ /dev/null @@ -1,75 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - args: null - ports: - - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: dataprep-svc -spec: - type: ClusterIP - selector: - app: dataprep-deploy - ports: - - name: port1 - port: 6007 - targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml deleted file mode 100644 index 42a20871d..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/embedding-dependency_run.yaml +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: embedding-dependency-svc -spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy - ports: - - name: service - port: 6006 - targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml deleted file mode 100644 index 093d2264b..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/llm-dependency_run.yaml +++ /dev/null @@ -1,88 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true - containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '2048' - - --max-total-tokens - - '4096' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml new file mode 100644 index 000000000..c42deb078 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml @@ -0,0 +1,530 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # Intel/neural-chat-7b-v3-3 + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: rerank-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna_no_wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- + + diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml deleted file mode 100644 index af908ecd1..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/reranking-dependency_run.yaml +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: reranking-dependency-svc -spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy - ports: - - name: service - port: 8808 - targetPort: 80 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml deleted file mode 100644 index ac6c12fdc..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/retrieval-microservice_run.yaml +++ /dev/null @@ -1,72 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true - containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - args: null - ports: - - containerPort: 7000 - serviceAccountName: default ---- -kind: Service -apiVersion: v1 -metadata: - name: retriever-svc -spec: - type: ClusterIP - selector: - app: retriever-deploy - ports: - - name: service - port: 7000 - targetPort: 7000 diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml deleted file mode 100644 index e04e8c5fe..000000000 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/vector-db_run.yaml +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - labels: - app: vector-db - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db - containers: - - name: vector-db - image: redis/redis-stack:7.2.0-v9 - ports: - - containerPort: 6379 - - containerPort: 8001 ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db -spec: - type: ClusterIP - selector: - app: vector-db - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 From 7e0199c41201dcb7e4119808011f106ae54198a0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Sep 2024 07:12:57 +0000 Subject: [PATCH 04/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml index c42deb078..3877df06c 100644 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml @@ -526,5 +526,3 @@ spec: --- - - From 16f35e97a8cc10ed6afd0f2e2746efc8f8ecd0b9 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Mon, 9 Sep 2024 19:04:30 -0700 Subject: [PATCH 05/45] revert to nc 7b --- .../single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml index c42deb078..45a6df8c9 100644 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml @@ -9,7 +9,7 @@ metadata: data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct # Intel/neural-chat-7b-v3-3 + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 From bee736b396851d782e0cf82df7fc74e06a337e91 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Mon, 9 Sep 2024 19:48:57 -0700 Subject: [PATCH 06/45] revert to nc 7b --- .../single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml index 7dbb59830..01d6e9c62 100644 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml @@ -60,7 +60,7 @@ spec: - envFrom: - configMapRef: name: qna-config - image: opea/chatqna_no_wrapper:latest + image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy args: null From 778b263ff5effeff2b90fe2f7e325e25b35ea119 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Mon, 9 Sep 2024 22:55:30 -0700 Subject: [PATCH 07/45] fix --- .../single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml index 01d6e9c62..760f41845 100644 --- a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml @@ -20,7 +20,7 @@ data: EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: rerank-dependency-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc NODE_SELECTOR: chatqna-opea # LLM_SERVICE_HOST_IP: llm-svc LLM_SERVER_HOST_IP: llm-dependency-svc From 7816ba9085a309b764a5572327d744b5bec4ceba Mon Sep 17 00:00:00 2001 From: Spycsh Date: Tue, 10 Sep 2024 02:35:16 -0700 Subject: [PATCH 08/45] add no rerank manifest dockerfile --- .../Dockerfile_no_wrapper_without_rerank | 34 +++++++++++++++++++ ChatQnA/docker/chatqna_no_wrapper.py | 12 +++++-- ChatQnA/docker/docker_build_compose.yaml | 5 +++ 3 files changed, 49 insertions(+), 2 deletions(-) create mode 100644 ChatQnA/docker/Dockerfile_no_wrapper_without_rerank diff --git a/ChatQnA/docker/Dockerfile_no_wrapper_without_rerank b/ChatQnA/docker/Dockerfile_no_wrapper_without_rerank new file mode 100644 index 000000000..0d6c1e34e --- /dev/null +++ b/ChatQnA/docker/Dockerfile_no_wrapper_without_rerank @@ -0,0 +1,34 @@ + + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +FROM python:3.11-slim + +RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ + libgl1-mesa-glx \ + libjemalloc-dev \ + vim \ + git + +RUN useradd -m -s /bin/bash user && \ + mkdir -p /home/user && \ + chown -R user /home/user/ + +WORKDIR /home/user/ +RUN git clone https://github.com/opea-project/GenAIComps.git + +WORKDIR /home/user/GenAIComps +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r /home/user/GenAIComps/requirements.txt && \ + pip install --no-cache-dir langchain_core + +COPY ./chatqna_no_wrapper.py /home/user/chatqna_no_wrapper.py + +ENV PYTHONPATH=$PYTHONPATH:/home/user/GenAIComps + +USER user + +WORKDIR /home/user + +ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"] diff --git a/ChatQnA/docker/chatqna_no_wrapper.py b/ChatQnA/docker/chatqna_no_wrapper.py index a2d007999..d4c29c0b4 100644 --- a/ChatQnA/docker/chatqna_no_wrapper.py +++ b/ChatQnA/docker/chatqna_no_wrapper.py @@ -7,6 +7,7 @@ from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType from langchain_core.prompts import PromptTemplate +import argparse class ChatTemplate: @@ -260,6 +261,13 @@ def add_remote_service_without_rerank(self): if __name__ == "__main__": + parser = argparse.ArgumentParser()action='store_true', + parser.add_argument("--without-rerank", action='store_true') + + args = parser.parse_args() + chatqna = ChatQnAService(host=MEGA_SERVICE_HOST_IP, port=MEGA_SERVICE_PORT) - chatqna.add_remote_service() - # chatqna.add_remote_service_without_rerank() + if args.without_rerank: + chatqna.add_remote_service_without_rerank() + else: + chatqna.add_remote_service() diff --git a/ChatQnA/docker/docker_build_compose.yaml b/ChatQnA/docker/docker_build_compose.yaml index 0d9200653..c5dc21f69 100644 --- a/ChatQnA/docker/docker_build_compose.yaml +++ b/ChatQnA/docker/docker_build_compose.yaml @@ -25,6 +25,11 @@ services: dockerfile: ./Dockerfile_no_wrapper extends: chatqna image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest} + chatqna-no-wrapper-without-rerank: + build: + dockerfile: ./Dockerfile_no_wrapper_without_rerank + extends: chatqna + image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest} chatqna-ui: build: context: ui From d723f064d98c6ec8eaf0afc2f301d4250d684162 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:36:49 +0000 Subject: [PATCH 09/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/docker/chatqna_no_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/docker/chatqna_no_wrapper.py b/ChatQnA/docker/chatqna_no_wrapper.py index d4c29c0b4..943a7d35c 100644 --- a/ChatQnA/docker/chatqna_no_wrapper.py +++ b/ChatQnA/docker/chatqna_no_wrapper.py @@ -1,13 +1,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import argparse import json import os import re from comps import ChatQnAGateway, MicroService, ServiceOrchestrator, ServiceType from langchain_core.prompts import PromptTemplate -import argparse class ChatTemplate: From 2cbc01f40ec9d3ad92334141bd8bd904be6e871c Mon Sep 17 00:00:00 2001 From: Spycsh Date: Tue, 10 Sep 2024 19:31:14 -0700 Subject: [PATCH 10/45] addcross node yaml --- .../no_wrapper_four_gaudi_with_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_single_gaudi_with_rerank.yaml} | 0 .../no_wrapper_two_gaudi_with_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_four_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ ...o_wrapper_single_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_two_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ 6 files changed, 2640 insertions(+) create mode 100644 ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml rename ChatQnA/benchmark/no_wrapper/{single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml => with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml} (100%) create mode 100644 ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml new file mode 100644 index 000000000..a6c5363f2 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/single_gaudi/no_wrapper_one_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml new file mode 100644 index 000000000..87b4fa548 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml new file mode 100644 index 000000000..02111fb3a --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml new file mode 100644 index 000000000..7d8611884 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml new file mode 100644 index 000000000..a32396145 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- From 475be764aca4750bc9c8f7a552760dda98219d63 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Wed, 11 Sep 2024 19:31:22 -0700 Subject: [PATCH 11/45] fix --- ChatQnA/chatqna_no_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index 943a7d35c..a521f0275 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -261,7 +261,7 @@ def add_remote_service_without_rerank(self): if __name__ == "__main__": - parser = argparse.ArgumentParser()action='store_true', + parser = argparse.ArgumentParser() parser.add_argument("--without-rerank", action='store_true') args = parser.parse_args() From e3755b6675f70e47a8203d8d6e4280047ad9bdbb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 02:31:40 +0000 Subject: [PATCH 12/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/chatqna_no_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index a521f0275..5d5560357 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -262,7 +262,7 @@ def add_remote_service_without_rerank(self): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("--without-rerank", action='store_true') + parser.add_argument("--without-rerank", action="store_true") args = parser.parse_args() From 93a9496b2b036d9d824f239d361765238685bbd6 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 11 Sep 2024 19:35:12 -0700 Subject: [PATCH 13/45] added tuned folder --- .../no_wrapper_four_gaudi_with_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_single_gaudi_with_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_two_gaudi_with_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_four_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ ...o_wrapper_single_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ .../no_wrapper_two_gaudi_without_rerank.yaml | 528 ++++++++++++++++++ 6 files changed, 3168 insertions(+) create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml create mode 100644 ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml new file mode 100644 index 000000000..a6c5363f2 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml new file mode 100644 index 000000000..760f41845 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml new file mode 100644 index 000000000..87b4fa548 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml new file mode 100644 index 000000000..02111fb3a --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 31 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml new file mode 100644 index 000000000..7d8611884 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml new file mode 100644 index 000000000..a32396145 --- /dev/null +++ b/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml @@ -0,0 +1,528 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + RERANK_MODEL_ID: BAAI/bge-reranker-base + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + INDEX_NAME: rag-redis + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + # EMBEDDING_SERVICE_HOST_IP: embedding-svc + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + # RERANK_SERVICE_HOST_IP: reranking-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc + NODE_SELECTOR: chatqna-opea + # LLM_SERVICE_HOST_IP: llm-svc + LLM_SERVER_HOST_IP: llm-dependency-svc + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + args: null + ports: + - containerPort: 8888 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: chatqna-backend-server-svc +spec: + type: NodePort + selector: + app: chatqna-backend-server-deploy + ports: + - name: service + port: 8888 + targetPort: 8888 + nodePort: 30888 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: dataprep-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + args: null + ports: + - containerPort: 6007 + - containerPort: 6008 + - containerPort: 6009 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: dataprep-svc +spec: + type: ClusterIP + selector: + app: dataprep-deploy + ports: + - name: port1 + port: 6007 + targetPort: 6007 + - name: port2 + port: 6008 + targetPort: 6008 + - name: port3 + port: 6009 + targetPort: 6009 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + name: embedding-dependency-deploy + args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: embedding-dependency-svc +spec: + type: ClusterIP + selector: + app: embedding-dependency-deploy + ports: + - name: service + port: 6006 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 15 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + hostIPC: true + containers: + - envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.1 + name: llm-dependency-deploy-demo + securityContext: + capabilities: + add: + - SYS_NICE + args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: llm-dependency-svc +spec: + type: ClusterIP + selector: + app: llm-dependency-deploy + ports: + - name: service + port: 9009 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: reranking-dependency-deploy + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + name: reranking-dependency-deploy + args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + serviceAccountName: default + volumes: + - name: model-volume + hostPath: + path: /mnt/models + type: Directory + - name: shm + emptyDir: + medium: Memory + sizeLimit: 1Gi +--- +kind: Service +apiVersion: v1 +metadata: + name: reranking-dependency-svc +spec: + type: ClusterIP + selector: + app: reranking-dependency-deploy + ports: + - name: service + port: 8808 + targetPort: 80 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: retriever-deploy + hostIPC: true + containers: + - env: + - name: REDIS_URL + valueFrom: + configMapKeyRef: + name: qna-config + key: REDIS_URL + - name: TEI_EMBEDDING_ENDPOINT + valueFrom: + configMapKeyRef: + name: qna-config + key: TEI_EMBEDDING_ENDPOINT + - name: HUGGINGFACEHUB_API_TOKEN + valueFrom: + configMapKeyRef: + name: qna-config + key: HUGGINGFACEHUB_API_TOKEN + - name: INDEX_NAME + valueFrom: + configMapKeyRef: + name: qna-config + key: INDEX_NAME + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + args: null + ports: + - containerPort: 7000 + serviceAccountName: default +--- +kind: Service +apiVersion: v1 +metadata: + name: retriever-svc +spec: + type: ClusterIP + selector: + app: retriever-deploy + ports: + - name: service + port: 7000 + targetPort: 7000 + + +--- + + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + labels: + app: vector-db + spec: + nodeSelector: + node-type: chatqna-opea + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + app: vector-db + containers: + - name: vector-db + image: redis/redis-stack:7.2.0-v9 + ports: + - containerPort: 6379 + - containerPort: 8001 +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db +spec: + type: ClusterIP + selector: + app: vector-db + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + + +--- From 686d5d901036a908bf815ad68a570edffe1a277f Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 11 Sep 2024 22:28:41 -0700 Subject: [PATCH 14/45] update OOB no wrapper with rerank manifests --- .../no_wrapper_four_gaudi_with_rerank.yaml | 426 ++++++++---------- .../no_wrapper_single_gaudi_with_rerank.yaml | 426 ++++++++---------- .../no_wrapper_two_gaudi_with_rerank.yaml | 426 ++++++++---------- 3 files changed, 576 insertions(+), 702 deletions(-) diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index a6c5363f2..c18ae9127 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,34 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -334,35 +307,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -376,34 +325,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +393,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +440,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +480,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index 760f41845..4ad7bac86 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,34 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -334,35 +307,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -376,34 +325,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +393,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +440,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +480,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 87b4fa548..e2a436ff5 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,34 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -334,35 +307,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -376,34 +325,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +393,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +440,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +480,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From 8a67e36a4d703beaeaa4b5deba8f86f8d52a00ee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 05:28:59 +0000 Subject: [PATCH 15/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 3 +++ .../single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml | 3 +++ .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index c18ae9127..9e076e120 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index 4ad7bac86..bc278dd41 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index e2a436ff5..29869e048 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From ae66a65500e3e8301b3c46597104a3b163bf9938 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 11 Sep 2024 23:15:15 -0700 Subject: [PATCH 16/45] SERVICE->SERVR --- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 7 ++----- .../single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml | 7 ++----- .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 7 ++----- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index 9e076e120..094f403d0 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 @@ -8,11 +5,11 @@ data: HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-dependency-svc + LLM_SERVER_HOST_IP: llm-dependency-svc NODE_SELECTOR: chatqna-opea REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index bc278dd41..c0417fe40 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 @@ -8,11 +5,11 @@ data: HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-dependency-svc + LLM_SERVER_HOST_IP: llm-dependency-svc NODE_SELECTOR: chatqna-opea REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 29869e048..7db1dd57c 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 @@ -8,11 +5,11 @@ data: HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-dependency-svc + LLM_SERVER_HOST_IP: llm-dependency-svc NODE_SELECTOR: chatqna-opea REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-dependency-svc + RERANK_SERVER_HOST_IP: reranking-dependency-svc RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 From e29a70288886da7f33e2fcc7b3a09bdd1c169d91 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:15:53 +0000 Subject: [PATCH 17/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 3 +++ .../single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml | 3 +++ .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index 094f403d0..f95078778 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index c0417fe40..fcf1bd424 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 7db1dd57c..56fbf194c 100644 --- a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 2dfe87889ac29c43d848829610136996af379cd3 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Wed, 11 Sep 2024 23:30:59 -0700 Subject: [PATCH 18/45] updated without_rerank manifests --- .../no_wrapper_four_gaudi_without_rerank.yaml | 430 ++++++------------ ...o_wrapper_single_gaudi_without_rerank.yaml | 430 ++++++------------ .../no_wrapper_two_gaudi_without_rerank.yaml | 430 ++++++------------ 3 files changed, 453 insertions(+), 837 deletions(-) diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml index 02111fb3a..4061fcb4b 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,121 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +307,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +354,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +394,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml index 7d8611884..e90509a83 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,121 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +307,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +354,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +394,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml index a32396145..7537e8fc4 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,35 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,20 +212,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -268,16 +224,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,121 +235,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +307,46 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +354,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +394,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From 2ae178776ec7c3cd521a838737f1cc560131b32b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 06:31:18 +0000 Subject: [PATCH 19/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml | 3 +++ .../single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml | 3 +++ .../two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml index 4061fcb4b..e12e0d44d 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml index e90509a83..a5529c1a2 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml index 7537e8fc4..14a713b81 100644 --- a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 00c8544db52244ca8ab9a9e3b5dc104a42cade6c Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 01:01:56 -0700 Subject: [PATCH 20/45] updated wrapper oob with rerank manifests --- .../oob_four_gaudi_with_rerank.yaml | 544 ++++++++---------- .../oob_single_gaudi_with_rerank.yaml | 544 ++++++++---------- .../two_gaudi/oob_two_gaudi_with_rerank.yaml | 544 ++++++++---------- 3 files changed, 678 insertions(+), 954 deletions(-) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index f8684c239..251be842b 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +44,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +212,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +219,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +264,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +276,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +287,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +359,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +366,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +411,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +429,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +497,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +504,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +549,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +596,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +629,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +636,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index b05326a30..f9e96e24e 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +44,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +212,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +219,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +264,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +276,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +287,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +359,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +366,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +411,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +429,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +497,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +504,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +549,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +596,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +629,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +636,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index 13d834512..f18e60a89 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,33 +44,35 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -107,70 +90,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -188,57 +142,59 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -256,16 +212,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -273,32 +219,34 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -316,20 +264,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -340,16 +276,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -361,38 +287,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -410,16 +359,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -427,32 +366,34 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -470,35 +411,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -512,38 +429,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -561,16 +497,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -578,32 +504,34 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -621,67 +549,41 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -694,24 +596,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -719,9 +629,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -729,6 +636,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From 4e73f56970129d57b4852d54354b4197c68783f8 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 08:02:18 +0000 Subject: [PATCH 21/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml | 3 +++ .../with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml | 3 +++ .../oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml index 251be842b..85866573f 100644 --- a/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/four_gaudi/oob_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml index f9e96e24e..eb63aada5 100644 --- a/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/single_gaudi/oob_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml index f18e60a89..1b9bf7ebf 100644 --- a/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/oob/with_rerank/two_gaudi/oob_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From b6f2e131437bbafc605850fd9a5acb930794cd7b Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 04:05:45 -0700 Subject: [PATCH 22/45] updated tuned wrapper manifests --- .../tuned_four_gaudi_with_rerank.yaml | 572 ++++++--------- .../tuned_single_gaudi_with_rerank.yaml | 578 +++++++-------- .../tuned_single_gaudi_with_rerank_v2.yaml | 675 ++++++++++++++++++ .../tuned_two_gaudi_with_rerank.yaml | 580 +++++++-------- 4 files changed, 1393 insertions(+), 1012 deletions(-) create mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 373b46c8a..6bcbae79b 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 4 + replicas: 1 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +44,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +97,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +149,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,38 +169,46 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -270,16 +226,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +233,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +282,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +294,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,38 +305,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -429,16 +377,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +384,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +433,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,38 +451,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -585,16 +519,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,37 +526,38 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -650,74 +575,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +626,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +659,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +666,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index 9d2f0ee96..06df06a99 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +44,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +97,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +149,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,45 +169,53 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-deploy @@ -270,16 +226,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +233,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +282,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +294,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,45 +305,68 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: llm-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: llm-deploy @@ -429,16 +377,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +384,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +433,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,45 +451,64 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: reranking-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: reranking-deploy @@ -585,16 +519,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,44 +526,45 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy @@ -650,74 +575,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +626,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +659,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +666,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml new file mode 100644 index 000000000..dae895f52 --- /dev/null +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml @@ -0,0 +1,675 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/embedding-tei:latest + imagePullPolicy: IfNotPresent + name: embedding-deploy + ports: + - containerPort: 6000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-svc + namespace: default +spec: + ports: + - name: service + port: 6000 + targetPort: 6000 + selector: + app: embedding-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 7 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: llm-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/llm-tgi:latest + imagePullPolicy: IfNotPresent + name: llm-deploy + ports: + - containerPort: 9000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-svc + namespace: default +spec: + ports: + - name: service + port: 9000 + targetPort: 9000 + selector: + app: llm-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/reranking-tei:latest + imagePullPolicy: IfNotPresent + name: reranking-deploy + ports: + - containerPort: 8000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-svc + namespace: default +spec: + ports: + - name: service + port: 8000 + targetPort: 8000 + selector: + app: reranking-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 4ed98c347..77995706b 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - RERANK_MODEL_ID: BAAI/bge-reranker-base + EMBEDDING_SERVICE_HOST_IP: embedding-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVICE_HOST_IP: llm-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVICE_HOST_IP: reranking-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - EMBEDDING_SERVICE_HOST_IP: embedding-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - RERANK_SERVICE_HOST_IP: reranking-svc - NODE_SELECTOR: chatqna-opea - LLM_SERVICE_HOST_IP: llm-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,40 +44,42 @@ spec: image: opea/chatqna:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: limits: cpu: 8 - memory: 4000Mi + memory: 8000Mi requests: cpu: 8 - memory: 4000Mi + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -114,70 +97,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -195,23 +149,17 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm ports: - containerPort: 80 resources: @@ -221,45 +169,53 @@ spec: requests: cpu: 80 memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-deploy namespace: default spec: - replicas: 2 + replicas: 4 selector: matchLabels: app: embedding-deploy @@ -270,16 +226,6 @@ spec: labels: app: embedding-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: embedding-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -287,37 +233,38 @@ spec: image: opea/embedding-tei:latest imagePullPolicy: IfNotPresent name: embedding-deploy - args: null ports: - containerPort: 6000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-svc namespace: default spec: - type: ClusterIP - selector: - app: embedding-deploy ports: - name: service port: 6000 targetPort: 6000 - - + selector: + app: embedding-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -335,20 +282,8 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length @@ -359,16 +294,6 @@ spec: - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -380,45 +305,68 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: llm-deploy namespace: default spec: - replicas: 2 + replicas: 4 selector: matchLabels: app: llm-deploy @@ -429,16 +377,6 @@ spec: labels: app: llm-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: llm-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -446,37 +384,38 @@ spec: image: opea/llm-tgi:latest imagePullPolicy: IfNotPresent name: llm-deploy - args: null ports: - containerPort: 9000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-svc namespace: default spec: - type: ClusterIP - selector: - app: llm-deploy ports: - name: service port: 9000 targetPort: 9000 - - + selector: + app: llm-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -494,35 +433,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -536,45 +451,64 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: reranking-deploy namespace: default spec: - replicas: 2 + replicas: 4 selector: matchLabels: app: reranking-deploy @@ -585,16 +519,6 @@ spec: labels: app: reranking-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -602,44 +526,45 @@ spec: image: opea/reranking-tei:latest imagePullPolicy: IfNotPresent name: reranking-deploy - args: null ports: - containerPort: 8000 resources: - limits: - cpu: 4 requests: cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-svc namespace: default spec: - type: ClusterIP - selector: - app: reranking-deploy ports: - name: service port: 8000 targetPort: 8000 - - + selector: + app: reranking-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 2 + replicas: 4 selector: matchLabels: app: retriever-deploy @@ -650,74 +575,45 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 resources: - limits: - cpu: 8 - memory: 2500Mi requests: - cpu: 8 - memory: 2500Mi + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: apps/v1 kind: Deployment metadata: @@ -730,24 +626,32 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service @@ -755,9 +659,6 @@ metadata: name: vector-db namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -765,6 +666,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From 8c0b60e3e7b740a6cd4ffbf06d850eea821f81f6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 11:07:24 +0000 Subject: [PATCH 23/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml | 3 +++ .../single_gaudi/tuned_single_gaudi_with_rerank.yaml | 3 +++ .../with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 6bcbae79b..eeda027b6 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index 06df06a99..ff5ed1c2a 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 77995706b..7f4a9b491 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 32bcba508fd49f033421fcafe9ab632e41a34deb Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 04:25:02 -0700 Subject: [PATCH 24/45] modify the path --- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 0 .../no_wrapper_single_gaudi_with_rerank.yaml | 0 .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 0 .../four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml | 0 .../no_wrapper_single_gaudi_without_rerank.yaml | 0 .../two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml | 0 .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 0 .../no_wrapper_single_gaudi_with_rerank.yaml | 0 .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 9 ++++++++- .../four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml | 0 .../no_wrapper_single_gaudi_without_rerank.yaml | 0 .../two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml | 0 12 files changed, 8 insertions(+), 1 deletion(-) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper => oob_no_wrapper}/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml (98%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml (100%) rename ChatQnA/benchmark/{no_wrapper/tuned => tuned_no_wrapper}/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml (100%) diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/tuned/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/tuned/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml similarity index 98% rename from ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 87b4fa548..6454119a3 100644 --- a/ChatQnA/benchmark/no_wrapper/tuned/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -35,7 +35,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy @@ -66,6 +66,13 @@ spec: args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi serviceAccountName: default --- kind: Service diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/tuned/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/tuned/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/no_wrapper/tuned/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml From 06e3ab06cb501538baac777e939f310ce97c0387 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 05:11:18 -0700 Subject: [PATCH 25/45] updated tuned wrapper manifests --- .../tuned_four_gaudi_with_rerank.yaml | 2 +- .../tuned_single_gaudi_with_rerank.yaml | 2 +- .../tuned_single_gaudi_with_rerank_v2.yaml | 675 ------------------ .../tuned_two_gaudi_with_rerank.yaml | 2 +- 4 files changed, 3 insertions(+), 678 deletions(-) delete mode 100644 ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index eeda027b6..ddb1b7fc5 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -29,7 +29,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: chatqna-backend-server-deploy diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index ff5ed1c2a..f0d80781f 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -29,7 +29,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml deleted file mode 100644 index dae895f52..000000000 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank_v2.yaml +++ /dev/null @@ -1,675 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -apiVersion: v1 -data: - EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 - EMBEDDING_SERVICE_HOST_IP: embedding-svc - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - INDEX_NAME: rag-redis - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 - LLM_SERVICE_HOST_IP: llm-svc - NODE_SELECTOR: chatqna-opea - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - RERANK_MODEL_ID: BAAI/bge-reranker-base - RERANK_SERVICE_HOST_IP: reranking-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 - TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 - TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 -kind: ConfigMap -metadata: - name: qna-config - namespace: default ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: chatqna-backend-server-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: chatqna-backend-server-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: chatqna-backend-server-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/chatqna:latest - imagePullPolicy: IfNotPresent - name: chatqna-backend-server-deploy - ports: - - containerPort: 8888 - resources: - limits: - cpu: 8 - memory: 8000Mi - requests: - cpu: 8 - memory: 8000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: chatqna-backend-server-svc - namespace: default -spec: - ports: - - name: service - nodePort: 30888 - port: 8888 - targetPort: 8888 - selector: - app: chatqna-backend-server-deploy - type: NodePort ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: dataprep-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: dataprep-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: dataprep-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/dataprep-redis:latest - imagePullPolicy: IfNotPresent - name: dataprep-deploy - ports: - - containerPort: 6007 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: dataprep-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: dataprep-svc - namespace: default -spec: - ports: - - name: port1 - port: 6007 - targetPort: 6007 - selector: - app: dataprep-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 - imagePullPolicy: IfNotPresent - name: embedding-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - cpu: 80 - memory: 20000Mi - requests: - cpu: 80 - memory: 20000Mi - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 6006 - targetPort: 80 - selector: - app: embedding-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: embedding-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: embedding-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: embedding-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/embedding-tei:latest - imagePullPolicy: IfNotPresent - name: embedding-deploy - ports: - - containerPort: 6000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: embedding-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: embedding-svc - namespace: default -spec: - ports: - - name: service - port: 6000 - targetPort: 6000 - selector: - app: embedding-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-dependency-deploy - namespace: default -spec: - replicas: 7 - selector: - matchLabels: - app: llm-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(LLM_MODEL_ID) - - --max-input-length - - '1024' - - --max-total-tokens - - '2048' - - --max-batch-total-tokens - - '65536' - - --max-batch-prefill-tokens - - '4096' - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.4 - imagePullPolicy: IfNotPresent - name: llm-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - securityContext: - capabilities: - add: - - SYS_NICE - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 9009 - targetPort: 80 - selector: - app: llm-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: llm-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: llm-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: llm-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/llm-tgi:latest - imagePullPolicy: IfNotPresent - name: llm-deploy - ports: - - containerPort: 9000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: llm-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: llm-svc - namespace: default -spec: - ports: - - name: service - port: 9000 - targetPort: 9000 - selector: - app: llm-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - containers: - - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - imagePullPolicy: IfNotPresent - name: reranking-dependency-deploy - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-dependency-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - volumes: - - hostPath: - path: /mnt/models - type: Directory - name: model-volume - - emptyDir: - medium: Memory - sizeLimit: 1Gi - name: shm ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-dependency-svc - namespace: default -spec: - ports: - - name: service - port: 8808 - targetPort: 80 - selector: - app: reranking-dependency-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/reranking-tei:latest - imagePullPolicy: IfNotPresent - name: reranking-deploy - ports: - - containerPort: 8000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: reranking-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: reranking-svc - namespace: default -spec: - ports: - - name: service - port: 8000 - targetPort: 8000 - selector: - app: reranking-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: retriever-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: retriever-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: retriever-deploy - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/retriever-redis:latest - imagePullPolicy: IfNotPresent - name: retriever-deploy - ports: - - containerPort: 7000 - resources: - requests: - cpu: 4 - memory: 4000Mi - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: retriever-deploy - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: retriever-svc - namespace: default -spec: - ports: - - name: service - port: 7000 - targetPort: 7000 - selector: - app: retriever-deploy - type: ClusterIP ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: vector-db - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: vector-db - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: vector-db - spec: - containers: - - envFrom: - - configMapRef: - name: qna-config - image: redis/redis-stack:7.2.0-v9 - imagePullPolicy: IfNotPresent - name: vector-db - ports: - - containerPort: 6379 - - containerPort: 8001 - hostIPC: true - nodeSelector: - node-type: chatqna-opea - serviceAccountName: default - topologySpreadConstraints: - - labelSelector: - matchLabels: - app: vector-db - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway ---- -apiVersion: v1 -kind: Service -metadata: - name: vector-db - namespace: default -spec: - ports: - - name: vector-db-service - port: 6379 - targetPort: 6379 - - name: vector-db-insight - port: 8001 - targetPort: 8001 - selector: - app: vector-db - type: ClusterIP ---- diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 7f4a9b491..575337bb3 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -29,7 +29,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: chatqna-backend-server-deploy From 24bc0eb1b8b55535bc1aaad534a6282638e6e5b4 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 05:29:35 -0700 Subject: [PATCH 26/45] updated tuned wrapper manifests --- .../four_gaudi/tuned_four_gaudi_with_rerank.yaml | 3 --- .../tuned_single_gaudi_with_rerank.yaml | 13 +++++-------- .../two_gaudi/tuned_two_gaudi_with_rerank.yaml | 13 +++++-------- 3 files changed, 10 insertions(+), 19 deletions(-) diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index ddb1b7fc5..501e5de57 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index f0d80781f..d7a42892b 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 @@ -29,7 +26,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: chatqna-backend-server-deploy @@ -218,7 +215,7 @@ metadata: name: embedding-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: embedding-deploy @@ -369,7 +366,7 @@ metadata: name: llm-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: llm-deploy @@ -511,7 +508,7 @@ metadata: name: reranking-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: reranking-deploy @@ -567,7 +564,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 2 + replicas: 1 selector: matchLabels: app: retriever-deploy diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 575337bb3..258c665e4 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -1,6 +1,3 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 @@ -29,7 +26,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 4 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy @@ -218,7 +215,7 @@ metadata: name: embedding-deploy namespace: default spec: - replicas: 4 + replicas: 2 selector: matchLabels: app: embedding-deploy @@ -369,7 +366,7 @@ metadata: name: llm-deploy namespace: default spec: - replicas: 4 + replicas: 2 selector: matchLabels: app: llm-deploy @@ -511,7 +508,7 @@ metadata: name: reranking-deploy namespace: default spec: - replicas: 4 + replicas: 2 selector: matchLabels: app: reranking-deploy @@ -567,7 +564,7 @@ metadata: name: retriever-deploy namespace: default spec: - replicas: 4 + replicas: 2 selector: matchLabels: app: retriever-deploy From 97a3d197c9638852743ff421236cab3174137d85 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:31:10 +0000 Subject: [PATCH 27/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml | 3 +++ .../single_gaudi/tuned_single_gaudi_with_rerank.yaml | 3 +++ .../with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml index 501e5de57..ddb1b7fc5 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/four_gaudi/tuned_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml index d7a42892b..dae895f52 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/single_gaudi/tuned_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml index 258c665e4..ee10361c7 100644 --- a/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned/with_rerank/two_gaudi/tuned_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 36b88d365683d4c31ce6c67e5c7d5041f7b637c3 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 13 Sep 2024 02:13:00 +0000 Subject: [PATCH 28/45] try fix ulimit --- ChatQnA/Dockerfile.no_wrapper | 2 +- ...pper_without_rerank => Dockerfile.no_wrapper_without_rerank} | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) rename ChatQnA/{Dockerfile_no_wrapper_without_rerank => Dockerfile.no_wrapper_without_rerank} (93%) diff --git a/ChatQnA/Dockerfile.no_wrapper b/ChatQnA/Dockerfile.no_wrapper index 2cd260122..73ec06ac3 100644 --- a/ChatQnA/Dockerfile.no_wrapper +++ b/ChatQnA/Dockerfile.no_wrapper @@ -31,6 +31,6 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 999999' >> ~/.bashrc +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc && . ~/.bashrc ENTRYPOINT ["python", "chatqna_no_wrapper.py"] diff --git a/ChatQnA/Dockerfile_no_wrapper_without_rerank b/ChatQnA/Dockerfile.no_wrapper_without_rerank similarity index 93% rename from ChatQnA/Dockerfile_no_wrapper_without_rerank rename to ChatQnA/Dockerfile.no_wrapper_without_rerank index 0d6c1e34e..7fc2847f4 100644 --- a/ChatQnA/Dockerfile_no_wrapper_without_rerank +++ b/ChatQnA/Dockerfile.no_wrapper_without_rerank @@ -31,4 +31,6 @@ USER user WORKDIR /home/user +RUN echo 'ulimit -S -n 999999' >> ~/.bashrc && . ~/.bashrc + ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"] From 25ff722695cd216a5c7d8affbb9eb67e137ca614 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 13 Sep 2024 03:20:52 +0000 Subject: [PATCH 29/45] lower ulimit n --- ChatQnA/Dockerfile.no_wrapper | 2 +- ChatQnA/Dockerfile.no_wrapper_without_rerank | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ChatQnA/Dockerfile.no_wrapper b/ChatQnA/Dockerfile.no_wrapper index 73ec06ac3..ed6590b90 100644 --- a/ChatQnA/Dockerfile.no_wrapper +++ b/ChatQnA/Dockerfile.no_wrapper @@ -31,6 +31,6 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 999999' >> ~/.bashrc && . ~/.bashrc +RUN echo 'ulimit -S -n 14999' >> ~/.bashrc && . ~/.bashrc ENTRYPOINT ["python", "chatqna_no_wrapper.py"] diff --git a/ChatQnA/Dockerfile.no_wrapper_without_rerank b/ChatQnA/Dockerfile.no_wrapper_without_rerank index 7fc2847f4..2675e6c63 100644 --- a/ChatQnA/Dockerfile.no_wrapper_without_rerank +++ b/ChatQnA/Dockerfile.no_wrapper_without_rerank @@ -31,6 +31,6 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 999999' >> ~/.bashrc && . ~/.bashrc +RUN echo 'ulimit -S -n 14999' >> ~/.bashrc && . ~/.bashrc ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"] From f343c3ea313fc67b1538509d4cda18b6c80b689e Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 23:19:55 -0700 Subject: [PATCH 30/45] updated tuned_no_wrapper --- .../no_wrapper_four_gaudi_with_rerank.yaml | 454 +++++++++--------- .../no_wrapper_single_gaudi_with_rerank.yaml | 448 ++++++++--------- .../no_wrapper_two_gaudi_with_rerank.yaml | 445 ++++++++--------- 3 files changed, 634 insertions(+), 713 deletions(-) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index a6c5363f2..bd313f696 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,42 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,73 +97,48 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy @@ -180,53 +149,66 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,34 +249,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -334,35 +321,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -376,41 +339,64 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: retriever-deploy @@ -421,67 +407,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +458,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +498,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index 760f41845..14c72e797 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,42 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +97,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,53 +149,66 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -244,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,34 +249,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -334,35 +321,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -376,34 +339,57 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -421,67 +407,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +458,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +498,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 6454119a3..2ee3be125 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,34 +1,25 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,7 +44,6 @@ spec: image: opea/chatqna-no-wrapper:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 resources: @@ -73,26 +53,33 @@ spec: requests: cpu: 8 memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -110,73 +97,48 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy @@ -187,53 +149,66 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -251,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1024' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -296,34 +249,61 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: llm-dependency-deploy ports: - name: service port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -341,35 +321,11 @@ spec: labels: app: reranking-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy containers: - - envFrom: - - configMapRef: - name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: + - args: - --model-id - $(RERANK_MODEL_ID) - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -383,41 +339,64 @@ spec: value: ${HF_TOKEN} - name: MAX_WARMUP_SEQUENCE_LENGTH value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: reranking-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service port: 8808 targetPort: 80 - - + selector: + app: reranking-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy @@ -428,67 +407,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -496,33 +458,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -530,6 +498,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From 33a69b5429701a267b26c907d40ff9315deb9d90 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 06:20:44 +0000 Subject: [PATCH 31/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 3 +++ .../single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml | 3 +++ .../two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index bd313f696..66d178648 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index 14c72e797..018a0a564 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index 2ee3be125..b755b658e 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 188c9849e0013bbe63897a45d4a68ff293d6e55b Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Thu, 12 Sep 2024 23:32:21 -0700 Subject: [PATCH 32/45] added eight_gtuned_no_wrapper with rerank eudi folder --- ...wrapper_tuned_eight_gaudi_with_rerank.yaml | 504 ++++++++++++++++++ 1 file changed, 504 insertions(+) create mode 100644 ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml new file mode 100644 index 000000000..88d0599c0 --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml @@ -0,0 +1,504 @@ +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 63 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1024' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 8 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- From 4052264d3f9c533f8e1ac06b567350ae9843810c Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 06:33:18 +0000 Subject: [PATCH 33/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml index 88d0599c0..dcc85c37d 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 220c4e55072f9020d20f30b684f6524c0eb589b8 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 00:39:49 -0700 Subject: [PATCH 34/45] updated tuned_no_wrapper with rerank 1024 -> 1280 --- .../eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml | 2 +- .../four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml | 2 +- .../single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml | 2 +- .../with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml index dcc85c37d..c541964e9 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_with_rerank.yaml @@ -234,7 +234,7 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' + - '1280' - --max-total-tokens - '2048' - --max-batch-total-tokens diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml index 66d178648..a4cf76e74 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml @@ -234,7 +234,7 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' + - '1280' - --max-total-tokens - '2048' - --max-batch-total-tokens diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml index 018a0a564..0f37c9b50 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml @@ -234,7 +234,7 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' + - '1280' - --max-total-tokens - '2048' - --max-batch-total-tokens diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml index b755b658e..9fbb3fa06 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml @@ -234,7 +234,7 @@ spec: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '1024' + - '1280' - --max-total-tokens - '2048' - --max-batch-total-tokens From 7665b605b94e69d15c151a38fe0253eea616e197 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 13 Sep 2024 08:12:31 +0000 Subject: [PATCH 35/45] revert useless ulimit --- ChatQnA/Dockerfile.no_wrapper | 2 -- ChatQnA/Dockerfile.no_wrapper_without_rerank | 2 -- 2 files changed, 4 deletions(-) diff --git a/ChatQnA/Dockerfile.no_wrapper b/ChatQnA/Dockerfile.no_wrapper index ed6590b90..c6adacaee 100644 --- a/ChatQnA/Dockerfile.no_wrapper +++ b/ChatQnA/Dockerfile.no_wrapper @@ -31,6 +31,4 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 14999' >> ~/.bashrc && . ~/.bashrc - ENTRYPOINT ["python", "chatqna_no_wrapper.py"] diff --git a/ChatQnA/Dockerfile.no_wrapper_without_rerank b/ChatQnA/Dockerfile.no_wrapper_without_rerank index 2675e6c63..0d6c1e34e 100644 --- a/ChatQnA/Dockerfile.no_wrapper_without_rerank +++ b/ChatQnA/Dockerfile.no_wrapper_without_rerank @@ -31,6 +31,4 @@ USER user WORKDIR /home/user -RUN echo 'ulimit -S -n 14999' >> ~/.bashrc && . ~/.bashrc - ENTRYPOINT ["python", "chatqna_no_wrapper.py", "--without-rerank"] From bbd39c8502800d5d9dc2da9abbdaaa35c1014089 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 01:42:02 -0700 Subject: [PATCH 36/45] renamed --- ...h_rerank.yaml => no_wrapper_oob_four_gaudi_with_rerank.yaml} | 0 ...rerank.yaml => no_wrapper_oob_single_gaudi_with_rerank.yaml} | 0 ...th_rerank.yaml => no_wrapper_oob_two_gaudi_with_rerank.yaml} | 0 ...erank.yaml => no_wrapper_oob_four_gaudi_without_rerank.yaml} | 0 ...ank.yaml => no_wrapper_oob_single_gaudi_without_rerank.yaml} | 0 ...rerank.yaml => no_wrapper_oob_two_gaudi_without_rerank.yaml} | 0 ...rerank.yaml => no_wrapper_tuned_four_gaudi_with_rerank.yaml} | 0 ...rank.yaml => no_wrapper_tuned_single_gaudi_with_rerank.yaml} | 2 +- ..._rerank.yaml => no_wrapper_tuned_two_gaudi_with_rerank.yaml} | 0 ...ank.yaml => no_wrapper_tuned_four_gaudi_without_rerank.yaml} | 0 ...k.yaml => no_wrapper_tuned_single_gaudi_without_rerank.yaml} | 0 ...rank.yaml => no_wrapper_tuned_two_gaudi_without_rerank.yaml} | 0 12 files changed, 1 insertion(+), 1 deletion(-) rename ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/{no_wrapper_four_gaudi_with_rerank.yaml => no_wrapper_oob_four_gaudi_with_rerank.yaml} (100%) rename ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/{no_wrapper_single_gaudi_with_rerank.yaml => no_wrapper_oob_single_gaudi_with_rerank.yaml} (100%) rename ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/{no_wrapper_two_gaudi_with_rerank.yaml => no_wrapper_oob_two_gaudi_with_rerank.yaml} (100%) rename ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/{no_wrapper_four_gaudi_without_rerank.yaml => no_wrapper_oob_four_gaudi_without_rerank.yaml} (100%) rename ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/{no_wrapper_single_gaudi_without_rerank.yaml => no_wrapper_oob_single_gaudi_without_rerank.yaml} (100%) rename ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/{no_wrapper_two_gaudi_without_rerank.yaml => no_wrapper_oob_two_gaudi_without_rerank.yaml} (100%) rename ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/{no_wrapper_four_gaudi_with_rerank.yaml => no_wrapper_tuned_four_gaudi_with_rerank.yaml} (100%) rename ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/{no_wrapper_single_gaudi_with_rerank.yaml => no_wrapper_tuned_single_gaudi_with_rerank.yaml} (99%) rename ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/{no_wrapper_two_gaudi_with_rerank.yaml => no_wrapper_tuned_two_gaudi_with_rerank.yaml} (100%) rename ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/{no_wrapper_four_gaudi_without_rerank.yaml => no_wrapper_tuned_four_gaudi_without_rerank.yaml} (100%) rename ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/{no_wrapper_single_gaudi_without_rerank.yaml => no_wrapper_tuned_single_gaudi_without_rerank.yaml} (100%) rename ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/{no_wrapper_two_gaudi_without_rerank.yaml => no_wrapper_tuned_two_gaudi_without_rerank.yaml} (100%) diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/four_gaudi/no_wrapper_oob_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/single_gaudi/no_wrapper_oob_single_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/with_rerank/two_gaudi/no_wrapper_oob_two_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_four_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml similarity index 99% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml index 0f37c9b50..9b64b1fbb 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_single_gaudi_with_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_with_rerank.yaml @@ -29,7 +29,7 @@ metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy diff --git a/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_two_gaudi_with_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/with_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_with_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_four_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_single_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml similarity index 100% rename from ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_two_gaudi_without_rerank.yaml rename to ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml From aa4fdf98a01271d34baa1650c7b320b91e162a31 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 01:48:05 -0700 Subject: [PATCH 37/45] renamed --- ChatQnA/benchmark/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/benchmark/README.md b/ChatQnA/benchmark/README.md index b666e8ce4..78aea8d7c 100644 --- a/ChatQnA/benchmark/README.md +++ b/ChatQnA/benchmark/README.md @@ -37,6 +37,7 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c - Set up kubectl on the master node with access to the Kubernetes cluster. - Install Python 3.8+ on the master node for running the stress tool. - Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. +- Ensure that the container's ulimit can meet the the number of requests. ### Kubernetes Cluster Example From f82a0606a5cca38ba05a0de9a6fa9e6cef6789df Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 02:05:28 -0700 Subject: [PATCH 38/45] updated tuned_no_wrapper without rerank --- ...apper_tuned_four_gaudi_without_rerank.yaml | 460 +++++++----------- ...per_tuned_single_gaudi_without_rerank.yaml | 458 +++++++---------- ...rapper_tuned_two_gaudi_without_rerank.yaml | 460 +++++++----------- 3 files changed, 524 insertions(+), 854 deletions(-) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml index 02111fb3a..81dca1de1 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,42 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,73 +97,48 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: embedding-dependency-deploy @@ -180,60 +149,73 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 31 + replicas: 32 selector: matchLabels: app: llm-dependency-deploy @@ -244,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1280' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,128 +249,68 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 4 selector: matchLabels: app: retriever-deploy @@ -421,67 +321,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +372,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +412,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml index 7d8611884..a95121c14 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,42 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,66 +97,41 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -180,60 +149,73 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 7 + replicas: 8 selector: matchLabels: app: llm-dependency-deploy @@ -244,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1280' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,128 +249,68 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy @@ -421,67 +321,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +372,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +412,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml index a32396145..dd8a9f0ed 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml @@ -1,41 +1,32 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - apiVersion: v1 -kind: ConfigMap -metadata: - name: qna-config - namespace: default data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 RERANK_MODEL_ID: BAAI/bge-reranker-base - LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 # meta-llama/Meta-Llama-3-8B-Instruct + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 - REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 - INDEX_NAME: rag-redis - HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} - # EMBEDDING_SERVICE_HOST_IP: embedding-svc - EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc - RETRIEVER_SERVICE_HOST_IP: retriever-svc - # RERANK_SERVICE_HOST_IP: reranking-svc - RERANK_SERVER_HOST_IP: reranking-dependency-svc - NODE_SELECTOR: chatqna-opea - # LLM_SERVICE_HOST_IP: llm-svc - LLM_SERVER_HOST_IP: llm-dependency-svc - - +kind: ConfigMap +metadata: + name: qna-config + namespace: default --- - - apiVersion: apps/v1 kind: Deployment metadata: name: chatqna-backend-server-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: chatqna-backend-server-deploy @@ -46,16 +37,6 @@ spec: labels: app: chatqna-backend-server-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: chatqna-backend-server-deploy - hostIPC: true containers: - envFrom: - configMapRef: @@ -63,29 +44,42 @@ spec: image: opea/chatqna-no-wrapper-without-rerank:latest imagePullPolicy: IfNotPresent name: chatqna-backend-server-deploy - args: null ports: - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: chatqna-backend-server-svc + namespace: default spec: - type: NodePort - selector: - app: chatqna-backend-server-deploy ports: - name: service + nodePort: 30888 port: 8888 targetPort: 8888 - nodePort: 30888 - - + selector: + app: chatqna-backend-server-deploy + type: NodePort --- - - apiVersion: apps/v1 kind: Deployment metadata: @@ -103,73 +97,48 @@ spec: labels: app: dataprep-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: dataprep-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/dataprep-redis:latest imagePullPolicy: IfNotPresent name: dataprep-deploy - args: null ports: - containerPort: 6007 - - containerPort: 6008 - - containerPort: 6009 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: dataprep-svc + namespace: default spec: - type: ClusterIP - selector: - app: dataprep-deploy ports: - name: port1 port: 6007 targetPort: 6007 - - name: port2 - port: 6008 - targetPort: 6008 - - name: port3 - port: 6009 - targetPort: 6009 - - + selector: + app: dataprep-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: embedding-dependency-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: embedding-dependency-deploy @@ -180,60 +149,73 @@ spec: labels: app: embedding-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea containers: - - envFrom: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: - configMapRef: name: qna-config image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent name: embedding-dependency-deploy - args: - - --model-id - - $(EMBEDDING_MODEL_ID) - - --auto-truncate + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi volumeMounts: - mountPath: /data name: model-volume - mountPath: /dev/shm name: shm - ports: - - containerPort: 80 + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: name: embedding-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: embedding-dependency-deploy ports: - name: service port: 6006 targetPort: 80 - - + selector: + app: embedding-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 15 + replicas: 16 selector: matchLabels: app: llm-dependency-deploy @@ -244,40 +226,18 @@ spec: labels: app: llm-dependency-deploy spec: - nodeSelector: - node-type: chatqna-opea - hostIPC: true containers: - - envFrom: - - configMapRef: - name: qna-config - image: ghcr.io/huggingface/tgi-gaudi:2.0.1 - name: llm-dependency-deploy-demo - securityContext: - capabilities: - add: - - SYS_NICE - args: + - args: - --model-id - $(LLM_MODEL_ID) - --max-input-length - - '2048' + - '1280' - --max-total-tokens - - '4096' + - '2048' - --max-batch-total-tokens - '65536' - --max-batch-prefill-tokens - '4096' - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm - ports: - - containerPort: 80 - resources: - limits: - habana.ai/gaudi: 1 env: - name: OMPI_MCA_btl_vader_single_copy_mechanism value: none @@ -289,128 +249,68 @@ spec: value: all - name: HF_TOKEN value: ${HF_TOKEN} - serviceAccountName: default - volumes: - - name: model-volume - hostPath: - path: /mnt/models - type: Directory - - name: shm - emptyDir: - medium: Memory - sizeLimit: 1Gi ---- -kind: Service -apiVersion: v1 -metadata: - name: llm-dependency-svc -spec: - type: ClusterIP - selector: - app: llm-dependency-deploy - ports: - - name: service - port: 9009 - targetPort: 80 - - ---- - - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: reranking-dependency-deploy - namespace: default -spec: - replicas: 1 - selector: - matchLabels: - app: reranking-dependency-deploy - template: - metadata: - annotations: - sidecar.istio.io/rewriteAppHTTPProbers: 'true' - labels: - app: reranking-dependency-deploy - spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: reranking-dependency-deploy - containers: - - envFrom: + envFrom: - configMapRef: name: qna-config - image: opea/tei-gaudi:latest - name: reranking-dependency-deploy - args: - - --model-id - - $(RERANK_MODEL_ID) - - --auto-truncate - volumeMounts: - - mountPath: /data - name: model-volume - - mountPath: /dev/shm - name: shm + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy ports: - containerPort: 80 resources: limits: habana.ai/gaudi: 1 - env: - - name: OMPI_MCA_btl_vader_single_copy_mechanism - value: none - - name: PT_HPU_ENABLE_LAZY_COLLECTIVES - value: 'true' - - name: runtime - value: habana - - name: HABANA_VISIBLE_DEVICES - value: all - - name: HF_TOKEN - value: ${HF_TOKEN} - - name: MAX_WARMUP_SEQUENCE_LENGTH - value: '512' + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway volumes: - - name: model-volume - hostPath: + - hostPath: path: /mnt/models type: Directory - - name: shm - emptyDir: + name: model-volume + - emptyDir: medium: Memory sizeLimit: 1Gi + name: shm --- -kind: Service apiVersion: v1 +kind: Service metadata: - name: reranking-dependency-svc + name: llm-dependency-svc + namespace: default spec: - type: ClusterIP - selector: - app: reranking-dependency-deploy ports: - name: service - port: 8808 + port: 9009 targetPort: 80 - - + selector: + app: llm-dependency-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: retriever-deploy namespace: default spec: - replicas: 1 + replicas: 2 selector: matchLabels: app: retriever-deploy @@ -421,67 +321,50 @@ spec: labels: app: retriever-deploy spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: retriever-deploy - hostIPC: true containers: - - env: - - name: REDIS_URL - valueFrom: - configMapKeyRef: - name: qna-config - key: REDIS_URL - - name: TEI_EMBEDDING_ENDPOINT - valueFrom: - configMapKeyRef: - name: qna-config - key: TEI_EMBEDDING_ENDPOINT - - name: HUGGINGFACEHUB_API_TOKEN - valueFrom: - configMapKeyRef: - name: qna-config - key: HUGGINGFACEHUB_API_TOKEN - - name: INDEX_NAME - valueFrom: - configMapKeyRef: - name: qna-config - key: INDEX_NAME + - envFrom: + - configMapRef: + name: qna-config image: opea/retriever-redis:latest imagePullPolicy: IfNotPresent name: retriever-deploy - args: null ports: - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- -kind: Service apiVersion: v1 +kind: Service metadata: name: retriever-svc + namespace: default spec: - type: ClusterIP - selector: - app: retriever-deploy ports: - name: service port: 7000 targetPort: 7000 - - + selector: + app: retriever-deploy + type: ClusterIP --- - - apiVersion: apps/v1 kind: Deployment metadata: name: vector-db + namespace: default spec: replicas: 1 selector: @@ -489,33 +372,39 @@ spec: app: vector-db template: metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' labels: app: vector-db spec: - nodeSelector: - node-type: chatqna-opea - topologySpreadConstraints: - - maxSkew: 1 - topologyKey: kubernetes.io/hostname - whenUnsatisfiable: ScheduleAnyway - labelSelector: - matchLabels: - app: vector-db containers: - - name: vector-db + - envFrom: + - configMapRef: + name: qna-config image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db ports: - containerPort: 6379 - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway --- apiVersion: v1 kind: Service metadata: name: vector-db + namespace: default spec: - type: ClusterIP - selector: - app: vector-db ports: - name: vector-db-service port: 6379 @@ -523,6 +412,7 @@ spec: - name: vector-db-insight port: 8001 targetPort: 8001 - - + selector: + app: vector-db + type: ClusterIP --- From bb9e80b443baf3a87cbc5063c7bf5f3bb196080b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 09:05:49 +0000 Subject: [PATCH 39/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml | 3 +++ .../no_wrapper_tuned_single_gaudi_without_rerank.yaml | 3 +++ .../two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml | 3 +++ 3 files changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml index 81dca1de1..9c74e60dd 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/four_gaudi/no_wrapper_tuned_four_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml index a95121c14..06c7321a5 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/single_gaudi/no_wrapper_tuned_single_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml index dd8a9f0ed..7505e2a03 100644 --- a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/two_gaudi/no_wrapper_tuned_two_gaudi_without_rerank.yaml @@ -1,3 +1,6 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + apiVersion: v1 data: EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 From 6bff80c90c6521d056619519787c831bd3426dd8 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 13 Sep 2024 09:49:04 +0000 Subject: [PATCH 40/45] fix norerank bug --- ChatQnA/chatqna_no_wrapper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ChatQnA/chatqna_no_wrapper.py b/ChatQnA/chatqna_no_wrapper.py index 5d5560357..2780c7486 100644 --- a/ChatQnA/chatqna_no_wrapper.py +++ b/ChatQnA/chatqna_no_wrapper.py @@ -96,7 +96,7 @@ def align_outputs(self, data, cur_node, inputs, runtime_graph, llm_parameters_di next_data["texts"] = [doc["text"] for doc in data["retrieved_docs"]] else: # forward to llm - if not docs: + if not docs and with_rerank: # delete the rerank from retriever -> rerank -> llm for ds in reversed(runtime_graph.downstream(cur_node)): for nds in runtime_graph.downstream(ds): From d7aebe86b75c45a12484babaaba87cf57d6f5a11 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 04:12:02 -0700 Subject: [PATCH 41/45] update readme.md --- ChatQnA/benchmark/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/ChatQnA/benchmark/README.md b/ChatQnA/benchmark/README.md index 78aea8d7c..fe45da085 100644 --- a/ChatQnA/benchmark/README.md +++ b/ChatQnA/benchmark/README.md @@ -38,6 +38,15 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c - Install Python 3.8+ on the master node for running the stress tool. - Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. - Ensure that the container's ulimit can meet the the number of requests. +```bash +# The way to modify the containered ulimit: +sudo systemctl edit containerd +# Add two lines: +[Service] +LimitNOFILE=65536:1048576 + +sudo systemctl daemon-reload; sudo systemctl restart containerd +``` ### Kubernetes Cluster Example From 89846bb0b1f1d4acb9964aaab6c7791bd9334bab Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Sep 2024 11:12:29 +0000 Subject: [PATCH 42/45] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ChatQnA/benchmark/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChatQnA/benchmark/README.md b/ChatQnA/benchmark/README.md index fe45da085..767b4999b 100644 --- a/ChatQnA/benchmark/README.md +++ b/ChatQnA/benchmark/README.md @@ -38,6 +38,7 @@ Results will be displayed in the terminal and saved as CSV file named `1_stats.c - Install Python 3.8+ on the master node for running the stress tool. - Ensure all nodes have a local /mnt/models folder, which will be mounted by the pods. - Ensure that the container's ulimit can meet the the number of requests. + ```bash # The way to modify the containered ulimit: sudo systemctl edit containerd From d1e2c354025472a8ff82f30daec68b15928ca145 Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 21:22:56 -0700 Subject: [PATCH 43/45] added oob no wrapper manifests --- ...o_wrapper_oob_eight_gaudi_with_rerank.yaml | 489 ++++++++++++++++++ ...rapper_oob_eight_gaudi_without_rerank.yaml | 403 +++++++++++++++ ...wrapper_oob_four_gaudi_without_rerank.yaml | 2 +- ...apper_oob_single_gaudi_without_rerank.yaml | 2 +- ..._wrapper_oob_two_gaudi_without_rerank.yaml | 2 +- 5 files changed, 895 insertions(+), 3 deletions(-) create mode 100644 ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml create mode 100644 ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml new file mode 100644 index 000000000..0e8ab7ff2 --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/with_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_with_rerank.yaml @@ -0,0 +1,489 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 63 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: reranking-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: reranking-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: reranking-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(RERANK_MODEL_ID) + - --auto-truncate + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + - name: MAX_WARMUP_SEQUENCE_LENGTH + value: '512' + envFrom: + - configMapRef: + name: qna-config + image: opea/tei-gaudi:latest + imagePullPolicy: IfNotPresent + name: reranking-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: reranking-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: reranking-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 8808 + targetPort: 80 + selector: + app: reranking-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml new file mode 100644 index 000000000..0d7c3388c --- /dev/null +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/eight_gaudi/no_wrapper_oob_eight_gaudi_without_rerank.yaml @@ -0,0 +1,403 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 64 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '2048' + - --max-total-tokens + - '4096' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml index e12e0d44d..1a8ff4992 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/four_gaudi/no_wrapper_oob_four_gaudi_without_rerank.yaml @@ -204,7 +204,7 @@ metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 31 + replicas: 32 selector: matchLabels: app: llm-dependency-deploy diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml index a5529c1a2..4b5e034ae 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/single_gaudi/no_wrapper_oob_single_gaudi_without_rerank.yaml @@ -204,7 +204,7 @@ metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 7 + replicas: 8 selector: matchLabels: app: llm-dependency-deploy diff --git a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml index 14a713b81..16e3020f1 100644 --- a/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml +++ b/ChatQnA/benchmark/oob_no_wrapper/without_rerank/two_gaudi/no_wrapper_oob_two_gaudi_without_rerank.yaml @@ -204,7 +204,7 @@ metadata: name: llm-dependency-deploy namespace: default spec: - replicas: 15 + replicas: 16 selector: matchLabels: app: llm-dependency-deploy From a9f7c8ab9a458d1dbd816ed9849d909e2c8a551d Mon Sep 17 00:00:00 2001 From: Zhenzhong1 Date: Fri, 13 Sep 2024 21:30:18 -0700 Subject: [PATCH 44/45] added tuned no wrapper eight manifests --- ...pper_tuned_eight_gaudi_without_rerank.yaml | 421 ++++++++++++++++++ 1 file changed, 421 insertions(+) create mode 100644 ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml diff --git a/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml new file mode 100644 index 000000000..36d9c4d2b --- /dev/null +++ b/ChatQnA/benchmark/tuned_no_wrapper/without_rerank/eight_gaudi/no_wrapper_tuned_eight_gaudi_without_rerank.yaml @@ -0,0 +1,421 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +data: + EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5 + EMBEDDING_SERVER_HOST_IP: embedding-dependency-svc + HUGGINGFACEHUB_API_TOKEN: ${HF_TOKEN} + INDEX_NAME: rag-redis + LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 + LLM_SERVER_HOST_IP: llm-dependency-svc + NODE_SELECTOR: chatqna-opea + REDIS_URL: redis://vector-db.default.svc.cluster.local:6379 + RERANK_MODEL_ID: BAAI/bge-reranker-base + RERANK_SERVER_HOST_IP: reranking-dependency-svc + RETRIEVER_SERVICE_HOST_IP: retriever-svc + TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006 + TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808 + TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009 +kind: ConfigMap +metadata: + name: qna-config + namespace: default +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: chatqna-backend-server-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: chatqna-backend-server-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: chatqna-backend-server-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/chatqna-no-wrapper-without-rerank:latest + imagePullPolicy: IfNotPresent + name: chatqna-backend-server-deploy + ports: + - containerPort: 8888 + resources: + limits: + cpu: 8 + memory: 8000Mi + requests: + cpu: 8 + memory: 8000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: chatqna-backend-server-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: chatqna-backend-server-svc + namespace: default +spec: + ports: + - name: service + nodePort: 30888 + port: 8888 + targetPort: 8888 + selector: + app: chatqna-backend-server-deploy + type: NodePort +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: dataprep-deploy + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: dataprep-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: dataprep-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/dataprep-redis:latest + imagePullPolicy: IfNotPresent + name: dataprep-deploy + ports: + - containerPort: 6007 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: dataprep-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: dataprep-svc + namespace: default +spec: + ports: + - name: port1 + port: 6007 + targetPort: 6007 + selector: + app: dataprep-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: embedding-dependency-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: embedding-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: embedding-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(EMBEDDING_MODEL_ID) + - --auto-truncate + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + imagePullPolicy: IfNotPresent + name: embedding-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + cpu: 80 + memory: 20000Mi + requests: + cpu: 80 + memory: 20000Mi + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: embedding-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: embedding-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 6006 + targetPort: 80 + selector: + app: embedding-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: llm-dependency-deploy + namespace: default +spec: + replicas: 64 + selector: + matchLabels: + app: llm-dependency-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: llm-dependency-deploy + spec: + containers: + - args: + - --model-id + - $(LLM_MODEL_ID) + - --max-input-length + - '1280' + - --max-total-tokens + - '2048' + - --max-batch-total-tokens + - '65536' + - --max-batch-prefill-tokens + - '4096' + env: + - name: OMPI_MCA_btl_vader_single_copy_mechanism + value: none + - name: PT_HPU_ENABLE_LAZY_COLLECTIVES + value: 'true' + - name: runtime + value: habana + - name: HABANA_VISIBLE_DEVICES + value: all + - name: HF_TOKEN + value: ${HF_TOKEN} + envFrom: + - configMapRef: + name: qna-config + image: ghcr.io/huggingface/tgi-gaudi:2.0.4 + imagePullPolicy: IfNotPresent + name: llm-dependency-deploy + ports: + - containerPort: 80 + resources: + limits: + habana.ai/gaudi: 1 + securityContext: + capabilities: + add: + - SYS_NICE + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /dev/shm + name: shm + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: llm-dependency-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + volumes: + - hostPath: + path: /mnt/models + type: Directory + name: model-volume + - emptyDir: + medium: Memory + sizeLimit: 1Gi + name: shm +--- +apiVersion: v1 +kind: Service +metadata: + name: llm-dependency-svc + namespace: default +spec: + ports: + - name: service + port: 9009 + targetPort: 80 + selector: + app: llm-dependency-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: retriever-deploy + namespace: default +spec: + replicas: 4 + selector: + matchLabels: + app: retriever-deploy + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: retriever-deploy + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: opea/retriever-redis:latest + imagePullPolicy: IfNotPresent + name: retriever-deploy + ports: + - containerPort: 7000 + resources: + requests: + cpu: 4 + memory: 4000Mi + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: retriever-deploy + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: retriever-svc + namespace: default +spec: + ports: + - name: service + port: 7000 + targetPort: 7000 + selector: + app: retriever-deploy + type: ClusterIP +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: vector-db + namespace: default +spec: + replicas: 1 + selector: + matchLabels: + app: vector-db + template: + metadata: + annotations: + sidecar.istio.io/rewriteAppHTTPProbers: 'true' + labels: + app: vector-db + spec: + containers: + - envFrom: + - configMapRef: + name: qna-config + image: redis/redis-stack:7.2.0-v9 + imagePullPolicy: IfNotPresent + name: vector-db + ports: + - containerPort: 6379 + - containerPort: 8001 + hostIPC: true + nodeSelector: + node-type: chatqna-opea + serviceAccountName: default + topologySpreadConstraints: + - labelSelector: + matchLabels: + app: vector-db + maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway +--- +apiVersion: v1 +kind: Service +metadata: + name: vector-db + namespace: default +spec: + ports: + - name: vector-db-service + port: 6379 + targetPort: 6379 + - name: vector-db-insight + port: 8001 + targetPort: 8001 + selector: + app: vector-db + type: ClusterIP +--- From dd74d5e8a726416369fdc3386644f8bbd2c6a841 Mon Sep 17 00:00:00 2001 From: Spycsh Date: Fri, 13 Sep 2024 23:31:03 -0700 Subject: [PATCH 45/45] fix --- ChatQnA/docker_image_build/build.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ChatQnA/docker_image_build/build.yaml b/ChatQnA/docker_image_build/build.yaml index 5b53f996e..4dd1d3b74 100644 --- a/ChatQnA/docker_image_build/build.yaml +++ b/ChatQnA/docker_image_build/build.yaml @@ -31,7 +31,8 @@ services: image: ${REGISTRY:-opea}/chatqna-no-wrapper:${TAG:-latest} chatqna-no-wrapper-without-rerank: build: - dockerfile: ./Dockerfile_no_wrapper_without_rerank + context: ../ + dockerfile: ./Dockerfile.no_wrapper_without_rerank extends: chatqna image: ${REGISTRY:-opea}/chatqna-no-wrapper-without-rerank:${TAG:-latest} chatqna-ui: