From 71282c7ee9ebaf27ae2a4af0ed3cf27d91aa57af Mon Sep 17 00:00:00 2001
From: Steve Zhang <huailong.zhang@intel.com>
Date: Fri, 20 Sep 2024 09:19:46 +0800
Subject: [PATCH] 
 'ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu' is
 intel cpu optimized tgi image, we need to use this one for all xeon platform.
 (#444)

Signed-off-by: zhlsunshine <huailong.zhang@intel.com>
(cherry picked from commit c84ac4c74c9d5f1474a4966d0107830db2963d1d)
---
 helm-charts/common/tgi/values.yaml                          | 3 ++-
 microservices-connector/config/manifests/tgi.yaml           | 2 +-
 microservices-connector/config/samples/ChatQnA/use_cases.md | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
index 805df10b..97ef2e59 100644
--- a/helm-charts/common/tgi/values.yaml
+++ b/helm-charts/common/tgi/values.yaml
@@ -26,7 +26,8 @@ image:
   repository: ghcr.io/huggingface/text-generation-inference
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "2.2.0"
+  # `sha-e4201f4-intel-cpu` is the image tag for intel cpu optimized tgi image
+  tag: "sha-e4201f4-intel-cpu"
 
 # empty for CPU
 accelDevice: ""
diff --git a/microservices-connector/config/manifests/tgi.yaml b/microservices-connector/config/manifests/tgi.yaml
index aa1f4cec..cece9855 100644
--- a/microservices-connector/config/manifests/tgi.yaml
+++ b/microservices-connector/config/manifests/tgi.yaml
@@ -87,7 +87,7 @@ spec:
                 optional: true
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data
diff --git a/microservices-connector/config/samples/ChatQnA/use_cases.md b/microservices-connector/config/samples/ChatQnA/use_cases.md
index c8acc2c9..e18ae2f7 100644
--- a/microservices-connector/config/samples/ChatQnA/use_cases.md
+++ b/microservices-connector/config/samples/ChatQnA/use_cases.md
@@ -19,7 +19,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - dataprep-redis: opea/dataprep-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-- tgi-service: ghcr.io/huggingface/text-generation-inference:2.2.0
+- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
 - redis-vector-db: redis/redis-stack:7.2.0-v9
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.