diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml
new file mode 100644
index 000000000..24b8e72df
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/chatqna_config_map.yaml
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: qna-config
+  namespace: default
+data:
+  EMBEDDING_MODEL_ID: BAAI/bge-base-en-v1.5
+  RERANK_MODEL_ID: BAAI/bge-reranker-base
+  LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
+  TEI_EMBEDDING_ENDPOINT: http://embedding-dependency-svc.default.svc.cluster.local:6006
+  TEI_RERANKING_ENDPOINT: http://reranking-dependency-svc.default.svc.cluster.local:8808
+  TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
+  REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
+  INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
+  EMBEDDING_SERVICE_HOST_IP: embedding-svc
+  RETRIEVER_SERVICE_HOST_IP: retriever-svc
+  RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
+  LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
new file mode 100644
index 000000000..6e93eb867
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/chatqna_mega_service_run.yaml
@@ -0,0 +1,62 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: chatqna-backend-server-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: chatqna-backend-server-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: chatqna-backend-server-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: chatqna-backend-server-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/chatqna:latest
+        imagePullPolicy: IfNotPresent
+        name: chatqna-backend-server-deploy
+        args: null
+        ports:
+        - containerPort: 8888
+        resources:
+          limits:
+            cpu: 8
+            memory: 4000Mi
+          requests:
+            cpu: 8
+            memory: 4000Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: chaqna-backend-server-svc
+spec:
+  type: NodePort
+  selector:
+    app: chatqna-backend-server-deploy
+  ports:
+  - name: service
+    port: 8888
+    targetPort: 8888
+    nodePort: 30888
diff --git a/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
new file mode 100644
index 000000000..14fae684c
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/dataprep-microservice_run.yaml
@@ -0,0 +1,70 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: dataprep-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: dataprep-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: dataprep-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: dataprep-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/dataprep-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: dataprep-deploy
+        args: null
+        ports:
+        - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: dataprep-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: dataprep-deploy
+  ports:
+  - name: port1
+    port: 6007
+    targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
new file mode 100644
index 000000000..7dcb10342
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/embedding-dependency_run.yaml
@@ -0,0 +1,69 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-dependency-deploy
+  namespace: default
+spec:
+  replicas: 6
+  selector:
+    matchLabels:
+      app: embedding-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
+        name: embedding-dependency-deploy
+        args:
+        - --model-id
+        - $(EMBEDDING_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            cpu: 80
+            memory: 20000Mi
+          requests:
+            cpu: 80
+            memory: 20000Mi
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-dependency-deploy
+  ports:
+  - name: service
+    port: 6006
+    targetPort: 80
diff --git a/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
new file mode 100644
index 000000000..f23ba0b4f
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/embedding-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: embedding-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: embedding-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: embedding-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: embedding-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/embedding-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: embedding-deploy
+        args: null
+        ports:
+        - containerPort: 6000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: embedding-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: embedding-deploy
+  ports:
+  - name: service
+    port: 6000
+    targetPort: 6000
diff --git a/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
new file mode 100644
index 000000000..17766cd48
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/llm-dependency_run.yaml
@@ -0,0 +1,88 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-dependency-deploy
+  namespace: default
+spec:
+  replicas: 31
+  selector:
+    matchLabels:
+      app: llm-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: tgi_gaudi:2.0.1
+        name: llm-dependency-deploy-demo
+        securityContext:
+          capabilities:
+            add:
+            - SYS_NICE
+        args:
+        - --model-id
+        - $(LLM_MODEL_ID)
+        - --max-input-length
+        - '1024'
+        - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
+        - '4096'
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-dependency-deploy
+  ports:
+  - name: service
+    port: 9009
+    targetPort: 80
diff --git a/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
new file mode 100644
index 000000000..1d9e29112
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/llm-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: llm-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: llm-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: llm-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: llm-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/llm-tgi:latest
+        imagePullPolicy: IfNotPresent
+        name: llm-deploy
+        args: null
+        ports:
+        - containerPort: 9000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: llm-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: llm-deploy
+  ports:
+  - name: service
+    port: 9000
+    targetPort: 9000
diff --git a/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
new file mode 100644
index 000000000..3f595ae1e
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/reranking-dependency_run.yaml
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-dependency-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-dependency-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-dependency-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-dependency-deploy
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: tei_gaudi:rerank
+        name: reranking-dependency-deploy
+        args:
+        - --model-id
+        - $(RERANK_MODEL_ID)
+        - --auto-truncate
+        volumeMounts:
+        - mountPath: /data
+          name: model-volume
+        - mountPath: /dev/shm
+          name: shm
+        ports:
+        - containerPort: 80
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+        env:
+        - name: OMPI_MCA_btl_vader_single_copy_mechanism
+          value: none
+        - name: PT_HPU_ENABLE_LAZY_COLLECTIVES
+          value: 'true'
+        - name: runtime
+          value: habana
+        - name: HABANA_VISIBLE_DEVICES
+          value: all
+        - name: HF_TOKEN
+          value: $(HF_TOKEN)
+        - name: MAX_WARMUP_SEQUENCE_LENGTH
+          value: '512'
+      serviceAccountName: default
+      volumes:
+      - name: model-volume
+        hostPath:
+          path: /home/sdp/cesg
+          type: Directory
+      - name: shm
+        emptyDir:
+          medium: Memory
+          sizeLimit: 1Gi
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-dependency-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-dependency-deploy
+  ports:
+  - name: service
+    port: 8808
+    targetPort: 80
diff --git a/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
new file mode 100644
index 000000000..25f6a00b3
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/reranking-microservice_run.yaml
@@ -0,0 +1,59 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: reranking-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: reranking-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: reranking-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: reranking-deploy
+      hostIPC: true
+      containers:
+      - envFrom:
+        - configMapRef:
+            name: qna-config
+        image: opea/reranking-tei:latest
+        imagePullPolicy: IfNotPresent
+        name: reranking-deploy
+        args: null
+        ports:
+        - containerPort: 8000
+        resources:
+          limits:
+            cpu: 4
+          requests:
+            cpu: 4
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: reranking-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: reranking-deploy
+  ports:
+  - name: service
+    port: 8000
+    targetPort: 8000
diff --git a/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
new file mode 100644
index 000000000..40040ee5b
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/retrieval-microservice_run.yaml
@@ -0,0 +1,69 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: retriever-deploy
+  namespace: default
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: retriever-deploy
+  template:
+    metadata:
+      annotations:
+        sidecar.istio.io/rewriteAppHTTPProbers: 'true'
+      labels:
+        app: retriever-deploy
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: retriever-deploy
+      hostIPC: true
+      containers:
+      - env:
+        - name: REDIS_URL
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: REDIS_URL
+        - name: INDEX_NAME
+          valueFrom:
+            configMapKeyRef:
+              name: qna-config
+              key: INDEX_NAME
+        image: opea/retriever-redis:latest
+        imagePullPolicy: IfNotPresent
+        name: retriever-deploy
+        args: null
+        ports:
+        - containerPort: 7000
+        resources:
+          limits:
+            cpu: 8
+            memory: 2500Mi
+          requests:
+            cpu: 8
+            memory: 2500Mi
+      serviceAccountName: default
+---
+kind: Service
+apiVersion: v1
+metadata:
+  name: retriever-svc
+spec:
+  type: ClusterIP
+  selector:
+    app: retriever-deploy
+  ports:
+  - name: service
+    port: 7000
+    targetPort: 7000
diff --git a/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml
new file mode 100644
index 000000000..e04e8c5fe
--- /dev/null
+++ b/ChatQnA/benchmark/four_gaudi/vector-db_run.yaml
@@ -0,0 +1,48 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: vector-db
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: vector-db
+  template:
+    metadata:
+      labels:
+        app: vector-db
+    spec:
+      nodeSelector:
+        node-type: chatqna-opea
+      topologySpreadConstraints:
+      - maxSkew: 1
+        topologyKey: kubernetes.io/hostname
+        whenUnsatisfiable: ScheduleAnyway
+        labelSelector:
+          matchLabels:
+            app: vector-db
+      containers:
+      - name: vector-db
+        image: redis/redis-stack:7.2.0-v9
+        ports:
+        - containerPort: 6379
+        - containerPort: 8001
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: vector-db
+spec:
+  type: ClusterIP
+  selector:
+    app: vector-db
+  ports:
+  - name: vector-db-service
+    port: 6379
+    targetPort: 6379
+  - name: vector-db-insight
+    port: 8001
+    targetPort: 8001
diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
index 0fbc55dbc..24b8e72df 100644
--- a/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_config_map.yaml
@@ -15,7 +15,9 @@ data:
   TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
   REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
   INDEX_NAME: rag-redis
+  HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
   EMBEDDING_SERVICE_HOST_IP: embedding-svc
   RETRIEVER_SERVICE_HOST_IP: retriever-svc
   RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
   LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
index e255b17e7..6e93eb867 100644
--- a/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/chatqna_mega_service_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: chatqna-backend-server-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
index de4d0716a..14fae684c 100644
--- a/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/dataprep-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: dataprep-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
index 11d35cfcf..d692876aa 100644
--- a/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-dependency_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: embedding-dependency-deploy
   namespace: default
 spec:
-  replicas: 4
+  replicas: 1
   selector:
     matchLabels:
       app: embedding-dependency-deploy
@@ -18,11 +18,13 @@ spec:
       labels:
         app: embedding-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       containers:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
         name: embedding-dependency-deploy
         args:
         - --model-id
@@ -37,11 +39,11 @@ spec:
         - containerPort: 80
         resources:
           limits:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
           requests:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
       serviceAccountName: default
       volumes:
       - name: model-volume
diff --git a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
index 2427872ff..f23ba0b4f 100644
--- a/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/embedding-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: embedding-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
index fbc0eac7e..eb49bdfdf 100644
--- a/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: llm-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       hostIPC: true
       containers:
       - envFrom:
@@ -33,8 +35,12 @@ spec:
         - --model-id
         - $(LLM_MODEL_ID)
         - --max-input-length
-        - '2048'
+        - '1024'
         - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
         - '4096'
         volumeMounts:
         - mountPath: /data
diff --git a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
index 8afbc3467..1d9e29112 100644
--- a/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/llm-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: llm-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
index 0fac53c97..3f595ae1e 100644
--- a/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: reranking-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
index 24fab63fe..25f6a00b3 100644
--- a/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/reranking-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: reranking-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
index e16505fcc..40040ee5b 100644
--- a/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/retrieval-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: retriever-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
index 704d79d32..e04e8c5fe 100644
--- a/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/single_gaudi/vector-db_run.yaml
@@ -15,6 +15,8 @@ spec:
       labels:
         app: vector-db
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
index 5449307ed..24b8e72df 100644
--- a/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_config_map.yaml
@@ -15,8 +15,9 @@ data:
   TGI_LLM_ENDPOINT: http://llm-dependency-svc.default.svc.cluster.local:9009
   REDIS_URL: redis://vector-db.default.svc.cluster.local:6379
   INDEX_NAME: rag-redis
-  HUGGINGFACEHUB_API_TOKEN: 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
+  HUGGINGFACEHUB_API_TOKEN: {HF_TOKEN}
   EMBEDDING_SERVICE_HOST_IP: embedding-svc
   RETRIEVER_SERVICE_HOST_IP: retriever-svc
   RERANK_SERVICE_HOST_IP: reranking-svc
+  NODE_SELECTOR: chatqna-opea
   LLM_SERVICE_HOST_IP: llm-svc
diff --git a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml b/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
index e255b17e7..6e93eb867 100644
--- a/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/chatqna_mega_service_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: chatqna-backend-server-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml b/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
index a9542d79d..14fae684c 100644
--- a/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/dataprep-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: dataprep-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
@@ -44,6 +46,8 @@ spec:
         args: null
         ports:
         - containerPort: 6007
+        - containerPort: 6008
+        - containerPort: 6009
       serviceAccountName: default
 ---
 kind: Service
@@ -58,3 +62,9 @@ spec:
   - name: port1
     port: 6007
     targetPort: 6007
+  - name: port2
+    port: 6008
+    targetPort: 6008
+  - name: port3
+    port: 6009
+    targetPort: 6009
diff --git a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
index f8e8bbed3..89d40715e 100644
--- a/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-dependency_run.yaml
@@ -7,7 +7,7 @@ metadata:
   name: embedding-dependency-deploy
   namespace: default
 spec:
-  replicas: 10
+  replicas: 3
   selector:
     matchLabels:
       app: embedding-dependency-deploy
@@ -18,11 +18,13 @@ spec:
       labels:
         app: embedding-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       containers:
       - envFrom:
         - configMapRef:
             name: qna-config
-        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.2
+        image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
         name: embedding-dependency-deploy
         args:
         - --model-id
@@ -37,11 +39,11 @@ spec:
         - containerPort: 80
         resources:
           limits:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
           requests:
-            cpu: 24
-            memory: 4000Mi
+            cpu: 80
+            memory: 20000Mi
       serviceAccountName: default
       volumes:
       - name: model-volume
diff --git a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml b/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
index 2427872ff..f23ba0b4f 100644
--- a/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/embedding-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: embedding-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
index 409a151b8..6191a9522 100644
--- a/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: llm-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       hostIPC: true
       containers:
       - envFrom:
@@ -33,8 +35,12 @@ spec:
         - --model-id
         - $(LLM_MODEL_ID)
         - --max-input-length
-        - '2048'
+        - '1024'
         - --max-total-tokens
+        - '2048'
+        - --max-batch-total-tokens
+        - '65536'
+        - --max-batch-prefill-tokens
         - '4096'
         volumeMounts:
         - mountPath: /data
diff --git a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml b/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
index 8afbc3467..1d9e29112 100644
--- a/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/llm-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: llm-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
index 0fac53c97..3f595ae1e 100644
--- a/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-dependency_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: reranking-dependency-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml b/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
index 24fab63fe..25f6a00b3 100644
--- a/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/reranking-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: reranking-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml b/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
index e16505fcc..40040ee5b 100644
--- a/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/retrieval-microservice_run.yaml
@@ -18,6 +18,8 @@ spec:
       labels:
         app: retriever-deploy
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname
diff --git a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml b/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
index 704d79d32..e04e8c5fe 100644
--- a/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
+++ b/ChatQnA/benchmark/two_gaudi/vector-db_run.yaml
@@ -15,6 +15,8 @@ spec:
       labels:
         app: vector-db
     spec:
+      nodeSelector:
+        node-type: chatqna-opea
       topologySpreadConstraints:
       - maxSkew: 1
         topologyKey: kubernetes.io/hostname