diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 4cf3b0b93dcf5..d50f4fb9e6ed4 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -229,7 +229,7 @@ def __init__(self, self.multimodal_config = multimodal_config self.padding_idx = config.pad_token_id - self.vocab_size = config.vocab_size + self.vocab_size = config.text_config.vocab_size self.image_token_id = _IMAGE_TOKEN_ID self.image_feature_size = config.patch_size**2 * config.num_channels diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index 68b6d0cf808e1..8130eb54753ea 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -152,7 +152,8 @@ def __init__(self, self.unpadded_vocab_size = config.text_config.vocab_size logit_scale = getattr(config, "logit_scale", 1.0) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, - config.vocab_size, logit_scale) + config.text_config.vocab_size, + logit_scale) self.sampler = Sampler() def _validate_pixel_values(self, data: torch.Tensor) -> torch.Tensor: diff --git a/vllm/model_executor/models/persimmon.py b/vllm/model_executor/models/persimmon.py index f8fc1cd8ef1f0..ced846cbe3358 100644 --- a/vllm/model_executor/models/persimmon.py +++ b/vllm/model_executor/models/persimmon.py @@ -213,10 +213,10 @@ def __init__(self, cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None): super().__init__() - self.vocab_size = config.vocab_size + self.vocab_size = config.text_config.vocab_size - self.embed_tokens = VocabParallelEmbedding(config.vocab_size, - config.hidden_size) + self.embed_tokens = VocabParallelEmbedding( + config.text_config.vocab_size, config.hidden_size) self.layers = nn.ModuleList([ PersimmonDecoderLayer(config, cache_config=cache_config, @@ -257,14 +257,14 @@ def __init__(self, quant_config: Optional[QuantizationConfig] = None): super().__init__() self.config = config - self.vocab_size = config.vocab_size + self.vocab_size = config.text_config.vocab_size self.model = PersimmonModel(config, cache_config=cache_config, quant_config=quant_config) - self.lm_head = ParallelLMHead(config.vocab_size, + self.lm_head = ParallelLMHead(config.text_config.vocab_size, config.hidden_size, bias=False) - self.logits_processor = LogitsProcessor(config.vocab_size) + self.logits_processor = LogitsProcessor(config.text_config.vocab_size) self.sampler = Sampler() def forward(