From bf9f6acefe6449c02947668f38dc2700afa4c988 Mon Sep 17 00:00:00 2001 From: "bongwon.jang" Date: Fri, 2 Aug 2024 06:32:16 +0000 Subject: [PATCH] recommit --- vllm/spec_decode/draft_model_runner.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/spec_decode/draft_model_runner.py b/vllm/spec_decode/draft_model_runner.py index 88b7cd5148301..5168acdfd3dfa 100644 --- a/vllm/spec_decode/draft_model_runner.py +++ b/vllm/spec_decode/draft_model_runner.py @@ -10,7 +10,6 @@ # vllm_flash_attn is not installed, use the identical ROCm FA metadata from vllm.attention.backends.rocm_flash_attn import ( ROCmFlashAttentionMetadata as FlashAttentionMetadata) - try: from flashinfer import BatchDecodeWithPagedKVCacheWrapper from flashinfer.decode import CUDAGraphBatchDecodeWithPagedKVCacheWrapper