Skip to content

Commit

Permalink
tgi: Update tgi version on xeon to latest-intel-cpu (#318)
Browse files Browse the repository at this point in the history
Fix issue #313

Signed-off-by: Lianhao Lu <lianhao.lu@intel.com>
Co-authored-by: Lianhao Lu <lianhao.lu@intel.com>
  • Loading branch information
yongfengdu and lianhao authored Aug 19, 2024
1 parent 54cd66f commit c06bcea
Show file tree
Hide file tree
Showing 14 changed files with 23 additions and 11 deletions.
1 change: 1 addition & 0 deletions helm-charts/chatqna/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ tgi:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
2 changes: 1 addition & 1 deletion helm-charts/chatqna/nv-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
tgi:
image:
repository: ghcr.io/huggingface/text-generation-inference
tag: "2.0"
tag: "2.2.0"
resources:
limits:
nvidia.com/gpu: 1
6 changes: 3 additions & 3 deletions helm-charts/chatqna/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ tgi:
LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

global:
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LANGCHAIN_TRACING_V2: false
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
Expand Down
1 change: 1 addition & 0 deletions helm-charts/codegen/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ tgi:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
6 changes: 3 additions & 3 deletions helm-charts/codegen/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ tgi:
LLM_MODEL_ID: meta-llama/CodeLlama-7b-hf

global:
http_proxy:
https_proxy:
no_proxy:
http_proxy: ""
https_proxy: ""
no_proxy: ""
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
LANGCHAIN_TRACING_V2: false
LANGCHAIN_API_KEY: "insert-your-langchain-key-here"
Expand Down
1 change: 1 addition & 0 deletions helm-charts/codetrans/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ tgi:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
1 change: 1 addition & 0 deletions helm-charts/common/tgi/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ image:

MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""

resources:
limits:
Expand Down
2 changes: 2 additions & 0 deletions helm-charts/common/tgi/nv-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ image:
resources:
limits:
nvidia.com/gpu: 1

CUDA_GRAPHS: ""
3 changes: 3 additions & 0 deletions helm-charts/common/tgi/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,6 @@ data:
{{- if .Values.MAX_TOTAL_TOKENS }}
MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
{{- end }}
{{- if .Values.CUDA_GRAPHS }}
CUDA_GRAPHS: {{ .Values.CUDA_GRAPHS | quote }}
{{- end }}
3 changes: 2 additions & 1 deletion helm-charts/common/tgi/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ image:
repository: ghcr.io/huggingface/text-generation-inference
pullPolicy: IfNotPresent
# Overrides the image tag whose default is the chart appVersion.
tag: "2.1.0"
tag: "latest-intel-cpu"

imagePullSecrets: []
nameOverride: ""
Expand Down Expand Up @@ -100,6 +100,7 @@ LLM_MODEL_ID: Intel/neural-chat-7b-v3-3

MAX_INPUT_LENGTH: ""
MAX_TOTAL_TOKENS: ""
CUDA_GRAPHS: "0"

global:
http_proxy: ""
Expand Down
1 change: 1 addition & 0 deletions helm-charts/docsum/gaudi-values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ tgi:
habana.ai/gaudi: 1
MAX_INPUT_LENGTH: "1024"
MAX_TOTAL_TOKENS: "2048"
CUDA_GRAPHS: ""
3 changes: 2 additions & 1 deletion microservices-connector/config/manifests/tgi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ data:
NUMBA_CACHE_DIR: "/tmp"
TRANSFORMERS_CACHE: "/tmp/transformers_cache"
HF_HOME: "/tmp/.cache/huggingface"
CUDA_GRAPHS: "0"
---
# Source: tgi/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
Expand Down Expand Up @@ -88,7 +89,7 @@ spec:
optional: true
securityContext:
{}
image: "ghcr.io/huggingface/text-generation-inference:2.1.0"
image: "ghcr.io/huggingface/text-generation-inference:latest-intel-cpu"
imagePullPolicy: IfNotPresent
volumeMounts:
- mountPath: /data
Expand Down
2 changes: 1 addition & 1 deletion microservices-connector/config/samples/codegen_gaudi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ spec:
internalService:
serviceName: tgi-gaudi-svc
config:
MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
MODEL_ID: meta-llama/CodeLlama-7b-hf
endpoint: /generate
isDownstreamService: true
2 changes: 1 addition & 1 deletion microservices-connector/config/samples/codegen_xeon.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,6 @@ spec:
internalService:
serviceName: tgi-service
config:
MODEL_ID: ise-uiuc/Magicoder-S-DS-6.7B
MODEL_ID: meta-llama/CodeLlama-7b-hf
endpoint: /generate
isDownstreamService: true

0 comments on commit c06bcea

Please sign in to comment.