opea-project · lianhao · Sep 20, 2024 · Sep 19, 2024
@@ -26,7 +26,8 @@ image:
   repository: ghcr.io/huggingface/text-generation-inference
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "2.2.0"
+  # `sha-e4201f4-intel-cpu` is the image tag for intel cpu optimized tgi image
+  tag: "sha-e4201f4-intel-cpu"
 
 # empty for CPU
 accelDevice: ""

@@ -87,7 +87,7 @@ spec:
                 optional: true
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:2.2.0"
+          image: "ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data

@@ -19,7 +19,7 @@ The ChatQnA uses the below prebuilt images if you choose a Xeon deployment
 - dataprep-redis: opea/dataprep-redis:latest
 - tei_xeon_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
 - tei_embedding_service: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
-- tgi-service: ghcr.io/huggingface/text-generation-inference:2.2.0
+- tgi-service: ghcr.io/huggingface/text-generation-inference:sha-e4201f4-intel-cpu
 - redis-vector-db: redis/redis-stack:7.2.0-v9
 
 Should you desire to use the Gaudi accelerator, two alternate images are used for the embedding and llm services.