diff --git a/ChatQnA/docker/gaudi/README.md b/ChatQnA/docker/gaudi/README.md index 2b49a33f4..f2a34ffc1 100644 --- a/ChatQnA/docker/gaudi/README.md +++ b/ChatQnA/docker/gaudi/README.md @@ -218,8 +218,13 @@ export BACKEND_SERVICE_ENDPOINT="http://${host_ip}:8888/v1/chatqna" export DATAPREP_SERVICE_ENDPOINT="http://${host_ip}:6007/v1/dataprep" export DATAPREP_GET_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/get_file" export DATAPREP_DELETE_FILE_ENDPOINT="http://${host_ip}:6007/v1/dataprep/delete_file" + +export llm_service_devices=all +export tei_embedding_devices=all ``` +To specify the device ids, "llm_service_devices" and "tei_embedding_devices"` can be set as "0,1,2,3" alike. More info in [gaudi docs](https://docs.habana.ai/en/latest/Orchestration/Multiple_Tenants_on_HPU/Multiple_Dockers_each_with_Single_Workload.html). + If guardrails microservice is enabled in the pipeline, the below environment variables are necessary to be set. ```bash @@ -229,7 +234,7 @@ export SAFETY_GUARD_ENDPOINT="http://${host_ip}:8088" export GUARDRAIL_SERVICE_HOST_IP=${host_ip} ``` -Note: Please replace with `host_ip` with you external IP address, do **NOT** use localhost. +Note: Please replace `host_ip` with your external IP address, do **NOT** use localhost. ### Start all the services Docker Containers diff --git a/ChatQnA/docker/gaudi/compose.yaml b/ChatQnA/docker/gaudi/compose.yaml index e5327b623..6689efc6f 100644 --- a/ChatQnA/docker/gaudi/compose.yaml +++ b/ChatQnA/docker/gaudi/compose.yaml @@ -39,7 +39,7 @@ services: no_proxy: ${no_proxy} http_proxy: ${http_proxy} https_proxy: ${https_proxy} - HABANA_VISIBLE_DEVICES: all + HABANA_VISIBLE_DEVICES: ${tei_embedding_devices} OMPI_MCA_btl_vader_single_copy_mechanism: none MAX_WARMUP_SEQUENCE_LENGTH: 512 INIT_HCCL_ON_ACQUIRE: 0 @@ -121,7 +121,7 @@ services: HF_TOKEN: ${HUGGINGFACEHUB_API_TOKEN} HF_HUB_DISABLE_PROGRESS_BARS: 1 HF_HUB_ENABLE_HF_TRANSFER: 0 - HABANA_VISIBLE_DEVICES: all + HABANA_VISIBLE_DEVICES: ${llm_service_devices} OMPI_MCA_btl_vader_single_copy_mechanism: none runtime: habana cap_add: