From 66984d4b277b118c61f171cf2871b7afbc39de5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniele=20Trifir=C3=B2?= <dtrifiro@redhat.com>
Date: Wed, 11 Sep 2024 12:33:01 +0200
Subject: [PATCH] add vllm-tgis-adapter layer

---
 Dockerfile.rocm.ubi | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Dockerfile.rocm.ubi b/Dockerfile.rocm.ubi
index 05511e4a80e7..b9010c6afa21 100644
--- a/Dockerfile.rocm.ubi
+++ b/Dockerfile.rocm.ubi
@@ -236,3 +236,22 @@ USER 2000
 
 # Set the entrypoint
 ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
+
+
+FROM vllm-openai as vllm-grpc-adapter
+
+USER root
+
+RUN --mount=type=cache,target=/root/.cache/pip \
+    pip install vllm-tgis-adapter==0.4.0
+
+ENV GRPC_PORT=8033 \
+    PORT=8000 \
+    # As an optimization, vLLM disables logprobs when using spec decoding by
+    # default, but this would be unexpected to users of a hosted model that
+    # happens to have spec decoding
+    # see: https://github.com/vllm-project/vllm/pull/6485
+    DISABLE_LOGPROBS_DURING_SPEC_DECODING=false
+
+USER 2000
+ENTRYPOINT ["python3", "-m", "vllm_tgis_adapter", "--uvicorn-log-level=warning"]