From dac914b0d6bc36de4eb4bf70a9d20954560893ea Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 4 Oct 2024 21:45:38 -0700 Subject: [PATCH] [Bugfix] use blockmanagerv1 for encoder-decoder (#9084) Co-authored-by: Roger Wang --- vllm/engine/arg_utils.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index cae95d20ca23d..1623ebb3aa74c 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -903,6 +903,11 @@ def create_engine_config(self) -> EngineConfig: "--enable-prefix-caching is currently not " "supported for multimodal models and has been disabled.") self.enable_prefix_caching = False + if model_config.is_encoder_decoder_model: + logger.warning( + "Block Manager v2 does not support encoder-decoder models" + " currently. Using Block Manager v1 as fallback.") + self.use_v2_block_manager = False cache_config = CacheConfig( block_size=self.block_size if self.device != "neuron" else