tgi: Update tgi version on xeon to latest-intel-cpu (#318)

Fix issue #313 Signed-off-by: Lianhao Lu <lianhao.lu@intel.com> Co-authored-by: Lianhao Lu <lianhao.lu@intel.com>
opea-project · Aug 19, 2024 · c06bcea · c06bcea
1 parent 54cd66f
commit c06bcea
Show file tree

Hide file tree

Showing 14 changed files with 23 additions and 11 deletions.
diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml
@@ -21,3 +21,4 @@ tgi:
       habana.ai/gaudi: 1
   MAX_INPUT_LENGTH: "1024"
   MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/chatqna/nv-values.yaml b/helm-charts/chatqna/nv-values.yaml
@@ -5,7 +5,7 @@
 tgi:
   image:
     repository: ghcr.io/huggingface/text-generation-inference
-    tag: "2.0"
+    tag: "2.2.0"
   resources:
     limits:
       nvidia.com/gpu: 1
diff --git a/helm-charts/chatqna/values.yaml b/helm-charts/chatqna/values.yaml
@@ -40,9 +40,9 @@ tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 global:
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   LANGCHAIN_TRACING_V2: false
   LANGCHAIN_API_KEY: "insert-your-langchain-key-here"

diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml
@@ -10,3 +10,4 @@ tgi:
       habana.ai/gaudi: 1
   MAX_INPUT_LENGTH: "1024"
   MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/codegen/values.yaml b/helm-charts/codegen/values.yaml
@@ -40,9 +40,9 @@ tgi:
   LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf
 
 global:
-  http_proxy:
-  https_proxy:
-  no_proxy:
+  http_proxy: ""
+  https_proxy: ""
+  no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
   LANGCHAIN_TRACING_V2: false
   LANGCHAIN_API_KEY: "insert-your-langchain-key-here"

diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml
@@ -10,3 +10,4 @@ tgi:
       habana.ai/gaudi: 1
   MAX_INPUT_LENGTH: "1024"
   MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml
@@ -11,6 +11,7 @@ image:
 
 MAX_INPUT_LENGTH: "1024"
 MAX_TOTAL_TOKENS: "2048"
+CUDA_GRAPHS: ""
 
 resources:
   limits:

diff --git a/helm-charts/common/tgi/nv-values.yaml b/helm-charts/common/tgi/nv-values.yaml
@@ -12,3 +12,5 @@ image:
 resources:
   limits:
     nvidia.com/gpu: 1
+
+CUDA_GRAPHS: ""
diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml
@@ -27,3 +27,6 @@ data:
   {{- if .Values.MAX_TOTAL_TOKENS }}
   MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
   {{- end }}
+  {{- if .Values.CUDA_GRAPHS }}
+  CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
+  {{- end }}
diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
@@ -13,7 +13,7 @@ image:
   repository: ghcr.io/huggingface/text-generation-inference
   pullPolicy: IfNotPresent
   # Overrides the image tag whose default is the chart appVersion.
-  tag: "2.1.0"
+  tag: "latest-intel-cpu"
 
 imagePullSecrets: []
 nameOverride: ""
@@ -100,6 +100,7 @@ LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
 MAX_INPUT_LENGTH: ""
 MAX_TOTAL_TOKENS: ""
+CUDA_GRAPHS: "0"
 
 global:
   http_proxy: ""

diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
@@ -10,3 +10,4 @@ tgi:
       habana.ai/gaudi: 1
   MAX_INPUT_LENGTH: "1024"
   MAX_TOTAL_TOKENS: "2048"
+  CUDA_GRAPHS: ""
diff --git a/microservices-connector/config/manifests/tgi.yaml b/microservices-connector/config/manifests/tgi.yaml
@@ -24,6 +24,7 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  CUDA_GRAPHS: "0"
 ---
 # Source: tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
@@ -88,7 +89,7 @@ spec:
                 optional: true
           securityContext:
             {}
-          image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
+          image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
           imagePullPolicy: IfNotPresent
           volumeMounts:
             - mountPath: /data

diff --git a/microservices-connector/config/samples/codegen_gaudi.yaml b/microservices-connector/config/samples/codegen_gaudi.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-gaudi-svc
           config:
-            MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
+            MODEL_ID: meta-llama/CodeLlama-7b-hf
             endpoint: /generate
           isDownstreamService: true
diff --git a/microservices-connector/config/samples/codegen_xeon.yaml b/microservices-connector/config/samples/codegen_xeon.yaml
@@ -29,6 +29,6 @@ spec:
         internalService:
           serviceName: tgi-service
           config:
-            MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
+            MODEL_ID: meta-llama/CodeLlama-7b-hf
             endpoint: /generate
           isDownstreamService: true