From 5fcc1c40d13711d0a1f29c2c632636a60b662c9e Mon Sep 17 00:00:00 2001
From: root <rirv938@gmail.com>
Date: Thu, 24 Aug 2023 11:35:40 +0100
Subject: [PATCH] update

---
 vllm/model_executor/layers/quant.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/vllm/model_executor/layers/quant.py b/vllm/model_executor/layers/quant.py
index 989f257aa3bea..f1c14eb27de31 100644
--- a/vllm/model_executor/layers/quant.py
+++ b/vllm/model_executor/layers/quant.py
@@ -5,11 +5,10 @@
 
 
 try:
-    import awq_inference_engine  # with CUDA kernels
+    import awq_inference_engine
+    KERNELS_INSTALLED = True
 except ImportError as ex:
-    raise ImportError(
-        "Unable to import awq_inference_engine: run setup.py"
-        " to install AWQ CUDA kernels")
+    KERNELS_INSTALLED = False
 
 
 class ScaledActivation(nn.Module):
@@ -34,6 +33,11 @@ def __init__(
         ):
         super().__init__()
 
+        if not KERNELS_INSTALLED:
+            raise ImportError(
+                "Unable to import awq_ext: run setup.py"
+                " to install AWQ CUDA kernels")
+
         if w_bit not in [4]:
             raise NotImplementedError("Only 4-bit are supported for now.")