Skip to content

Commit

Permalink
[Model] Fix Baichuan BNB online quantization (vllm-project#10572)
Browse files Browse the repository at this point in the history
Signed-off-by: Chen Wu <cntryroa@gmail.com>
  • Loading branch information
CNTRYROA authored and weilong.yu committed Dec 13, 2024
1 parent eedb2ba commit 7d5171c
Showing 1 changed file with 15 additions and 0 deletions.
15 changes: 15 additions & 0 deletions vllm/model_executor/models/baichuan.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,21 @@ class BaiChuanBaseForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
embedding_modules = {}
embedding_padding_modules = []

# BitandBytes specific attributes
default_bitsandbytes_target_modules = [
".W_pack.",
".o_proj.",
".down_proj.",
".up_proj.",
".gate_proj.",
".up_proj.",
]
bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"gate_proj": ("gate_up_proj", 0),
"up_proj": ("gate_up_proj", 1),
}

def __init__(
self,
*,
Expand Down

0 comments on commit 7d5171c

Please sign in to comment.