[Model] Fix Baichuan BNB online quantization (vllm-project#10572)

Signed-off-by: Chen Wu <cntryroa@gmail.com>
sleepwalker2017 · Dec 13, 2024 · 7d5171c · 7d5171c
1 parent eedb2ba
commit 7d5171c
Showing 1 changed file with 15 additions and 0 deletions.
diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py
@@ -350,6 +350,21 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
     embedding_modules = {}
     embedding_padding_modules = []
 
+    # BitandBytes specific attributes
+    default_bitsandbytes_target_modules = [
+        ".W_pack.",
+        ".o_proj.",
+        ".down_proj.",
+        ".up_proj.",
+        ".gate_proj.",
+        ".up_proj.",
+    ]
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
+
     def __init__(
         self,
         *,