From 66984d4b277b118c61f171cf2871b7afbc39de5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= Date: Wed, 11 Sep 2024 12:33:01 +0200 Subject: [PATCH] add vllm-tgis-adapter layer --- Dockerfile.rocm.ubi | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/Dockerfile.rocm.ubi b/Dockerfile.rocm.ubi index 05511e4a80e7..b9010c6afa21 100644 --- a/Dockerfile.rocm.ubi +++ b/Dockerfile.rocm.ubi @@ -236,3 +236,22 @@ USER 2000 # Set the entrypoint ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"] + + +FROM vllm-openai as vllm-grpc-adapter + +USER root + +RUN --mount=type=cache,target=/root/.cache/pip \ + pip install vllm-tgis-adapter==0.4.0 + +ENV GRPC_PORT=8033 \ + PORT=8000 \ + # As an optimization, vLLM disables logprobs when using spec decoding by + # default, but this would be unexpected to users of a hosted model that + # happens to have spec decoding + # see: https://github.com/vllm-project/vllm/pull/6485 + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false + +USER 2000 +ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]