From 5fcc1c40d13711d0a1f29c2c632636a60b662c9e Mon Sep 17 00:00:00 2001 From: root Date: Thu, 24 Aug 2023 11:35:40 +0100 Subject: [PATCH] update --- vllm/model_executor/layers/quant.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quant.py b/vllm/model_executor/layers/quant.py index 989f257aa3bea..f1c14eb27de31 100644 --- a/vllm/model_executor/layers/quant.py +++ b/vllm/model_executor/layers/quant.py @@ -5,11 +5,10 @@ try: - import awq_inference_engine # with CUDA kernels + import awq_inference_engine + KERNELS_INSTALLED = True except ImportError as ex: - raise ImportError( - "Unable to import awq_inference_engine: run setup.py" - " to install AWQ CUDA kernels") + KERNELS_INSTALLED = False class ScaledActivation(nn.Module): @@ -34,6 +33,11 @@ def __init__( ): super().__init__() + if not KERNELS_INSTALLED: + raise ImportError( + "Unable to import awq_ext: run setup.py" + " to install AWQ CUDA kernels") + if w_bit not in [4]: raise NotImplementedError("Only 4-bit are supported for now.")