From 2ee45281a5012072f41573eb09e1f82985adc761 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 5 Sep 2024 11:09:46 -0400 Subject: [PATCH] Move verify_marlin_supported to GPTQMarlinLinearMethod (#8165) --- vllm/model_executor/layers/quantization/gptq_marlin.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 94eb3f301541a..b06ff7bd2bace 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -51,10 +51,6 @@ def __init__(self, weight_bits: int, group_size: int, desc_act: bool, self.quant_type = self.TYPE_MAP[(weight_bits, is_sym)] - # Verify supported on platform. - verify_marlin_supported(quant_type=self.quant_type, - group_size=self.group_size) - def __repr__(self) -> str: return (f"GPTQMarlinConfig(quant_type={self.quant_type}, " f"group_size={self.group_size}, " @@ -153,6 +149,10 @@ class GPTQMarlinLinearMethod(LinearMethodBase): def __init__(self, quant_config: GPTQMarlinConfig) -> None: self.quant_config = quant_config + # Verify supported on platform. + verify_marlin_supported(quant_type=self.quant_config.quant_type, + group_size=self.quant_config.group_size) + def create_weights( self, layer: torch.nn.Module,