From 479d3934a5061f3055398661d7662f13e8165643 Mon Sep 17 00:00:00 2001 From: Travis Johnson Date: Tue, 20 Aug 2024 12:25:56 -0600 Subject: [PATCH] fix: enable logprobs during spec decoding by default Signed-off-by: Travis Johnson --- Dockerfile.ubi | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Dockerfile.ubi b/Dockerfile.ubi index e185ac549f513..7e6f612bc74bd 100644 --- a/Dockerfile.ubi +++ b/Dockerfile.ubi @@ -183,6 +183,11 @@ RUN --mount=type=cache,target=/root/.cache/pip \ ENV HF_HUB_OFFLINE=1 \ PORT=8000 \ HOME=/home/vllm \ + # As an optmization, vLLM disables logprobs when using spec decoding by + # default, but this would be unexpected to users of a hosted model that + # happens to have spec decoding + # see: https://github.com/vllm-project/vllm/pull/6485 + DISABLE_LOGPROBS_DURING_SPEC_DECODING=false \ # Allow requested max length to exceed what is extracted from the # config.json # see: https://github.com/vllm-project/vllm/pull/7080