Skip to content

Commit

Permalink
Make rerank run on gaudi for hpu docker compose (#980)
Browse files Browse the repository at this point in the history
Signed-off-by: lvliang-intel <liang1.lv@intel.com>
  • Loading branch information
lvliang-intel authored Oct 18, 2024
1 parent 7669c42 commit 3c164f3
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 16 deletions.
15 changes: 10 additions & 5 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand All @@ -65,20 +65,25 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
Expand Down
13 changes: 10 additions & 3 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
Expand All @@ -102,20 +104,25 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
tgi-service:
image: ghcr.io/huggingface/tgi-gaudi:2.0.5
Expand Down
13 changes: 10 additions & 3 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
Expand All @@ -63,20 +65,25 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-service:
image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest}
Expand Down
13 changes: 10 additions & 3 deletions ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
Expand All @@ -63,20 +65,25 @@ services:
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
restart: unless-stopped
tei-reranking-service:
image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5
image: ghcr.io/huggingface/tei-gaudi:latest
container_name: tei-reranking-gaudi-server
ports:
- "8808:80"
volumes:
- "./data:/data"
shm_size: 1g
runtime: habana
cap_add:
- SYS_NICE
ipc: host
environment:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
command: --model-id ${RERANK_MODEL_ID} --auto-truncate
vllm-ray-service:
image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@ services:
no_proxy: ${no_proxy}
http_proxy: ${http_proxy}
https_proxy: ${https_proxy}
HF_HUB_DISABLE_PROGRESS_BARS: 1
HF_HUB_ENABLE_HF_TRANSFER: 0
HABANA_VISIBLE_DEVICES: all
OMPI_MCA_btl_vader_single_copy_mechanism: none
MAX_WARMUP_SEQUENCE_LENGTH: 512
INIT_HCCL_ON_ACQUIRE: 0
ENABLE_EXPERIMENTAL_FLAGS: true
command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate
retriever:
image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest}
Expand Down

0 comments on commit 3c164f3

Please sign in to comment.