From 75e0aa4d57607117e7bba07f370c2fc260a1cac5 Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Fri, 18 Oct 2024 15:29:36 +0800 Subject: [PATCH 1/2] Make rerank run on gaudi for hpu docker compose Signed-off-by: lvliang-intel --- .../docker_compose/intel/hpu/gaudi/compose.yaml | 15 ++++++++------- .../intel/hpu/gaudi/compose_guardrails.yaml | 13 ++++++++----- .../intel/hpu/gaudi/compose_vllm.yaml | 13 ++++++++----- .../intel/hpu/gaudi/compose_vllm_ray.yaml | 13 ++++++++----- .../intel/hpu/gaudi/compose_without_rerank.yaml | 2 -- 5 files changed, 32 insertions(+), 24 deletions(-) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index 9036c2ccf..fa433960a 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -43,8 +43,6 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - INIT_HCCL_ON_ACQUIRE: 0 - ENABLE_EXPERIMENTAL_FLAGS: true command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} @@ -65,20 +63,23 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.5 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 3edb7bba7..80703b557 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -102,20 +102,23 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate tgi-service: image: ghcr.io/huggingface/tgi-gaudi:2.0.5 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml index 162527b10..0898fa42f 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml @@ -63,20 +63,23 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-service: image: ${REGISTRY:-opea}/llm-vllm-hpu:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml index f8b9fc7e3..67cf3e932 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml @@ -63,20 +63,23 @@ services: HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} restart: unless-stopped tei-reranking-service: - image: ghcr.io/huggingface/text-embeddings-inference:cpu-1.5 + image: ghcr.io/huggingface/tei-gaudi:latest container_name: tei-reranking-gaudi-server ports: - "8808:80" volumes: - "./data:/data" - shm_size: 1g + runtime: habana + cap_add: + - SYS_NICE + ipc: host environment: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} - HF_HUB_DISABLE_PROGRESS_BARS: 1 - HF_HUB_ENABLE_HF_TRANSFER: 0 + HABANA_VISIBLE_DEVICES: all + OMPI_MCA_btl_vader_single_copy_mechanism: none + MAX_WARMUP_SEQUENCE_LENGTH: 512 command: --model-id ${RERANK_MODEL_ID} --auto-truncate vllm-ray-service: image: ${REGISTRY:-opea}/llm-vllm-ray-hpu:${TAG:-latest} diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index cf5a7c2ca..4306d678e 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -43,8 +43,6 @@ services: HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 - INIT_HCCL_ON_ACQUIRE: 0 - ENABLE_EXPERIMENTAL_FLAGS: true command: --model-id ${EMBEDDING_MODEL_ID} --auto-truncate retriever: image: ${REGISTRY:-opea}/retriever-redis:${TAG:-latest} From 43270e5f07d9814a4bc01f0316d5654a7cd3140b Mon Sep 17 00:00:00 2001 From: lvliang-intel Date: Fri, 18 Oct 2024 17:43:56 +0800 Subject: [PATCH 2/2] add back envs Signed-off-by: lvliang-intel --- ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml | 4 ++++ .../docker_compose/intel/hpu/gaudi/compose_guardrails.yaml | 4 ++++ ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml | 4 ++++ ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml | 4 ++++ .../intel/hpu/gaudi/compose_without_rerank.yaml | 2 ++ 5 files changed, 18 insertions(+) diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml index fa433960a..20a8e9ffc 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -77,6 +79,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml index 80703b557..320ac6140 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_guardrails.yaml @@ -79,6 +79,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -116,6 +118,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml index 0898fa42f..0d7035cb6 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -77,6 +79,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml index 67cf3e932..296c5df52 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_vllm_ray.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 @@ -77,6 +79,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 diff --git a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml index 4306d678e..1b82d4ef1 100644 --- a/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml +++ b/ChatQnA/docker_compose/intel/hpu/gaudi/compose_without_rerank.yaml @@ -40,6 +40,8 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} + HF_HUB_DISABLE_PROGRESS_BARS: 1 + HF_HUB_ENABLE_HF_TRANSFER: 0 HABANA_VISIBLE_DEVICES: all OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512