diff --git a/intel_extension_for_transformers/llm/quantization/utils.py b/intel_extension_for_transformers/llm/quantization/utils.py index f07fcce258d..e72695a6d39 100644 --- a/intel_extension_for_transformers/llm/quantization/utils.py +++ b/intel_extension_for_transformers/llm/quantization/utils.py @@ -24,15 +24,16 @@ from datasets import load_dataset from neural_compressor import quantization from neural_compressor.adaptor.torch_utils.model_wrapper import WeightOnlyLinear -from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear as auto_round_woqlinear from neural_compressor.utils.utility import LazyImport from neural_compressor.config import PostTrainingQuantConfig -from ...utils.utils import is_ipex_available +from ...utils.utils import is_ipex_available, is_autoround_available from transformers import AutoTokenizer if is_ipex_available(): import intel_extension_for_pytorch as ipex +if is_autoround_available(): + from auto_round.export.export_to_itrex.model_wrapper import WeightOnlyLinear as auto_round_woqlinear # pylint: disable=E0401 torch = LazyImport("torch") @@ -106,7 +107,7 @@ def _replace_linear( is_removed = False if (isinstance(module, torch.nn.Linear) or isinstance(module, WeightOnlyLinear) - or isinstance(module, auto_round_woqlinear) or (is_ipex_available() + or (is_autoround_available() and isinstance(module, auto_round_woqlinear)) or (is_ipex_available() and isinstance(module, ipex.nn.utils._weight_prepack._IPEXLinear))) \ and (name not in modules_to_not_convert): # Check if the current key is not in the `modules_to_not_convert` diff --git a/intel_extension_for_transformers/utils/utils.py b/intel_extension_for_transformers/utils/utils.py index 7e7fd91ebca..43a89efcdc3 100644 --- a/intel_extension_for_transformers/utils/utils.py +++ b/intel_extension_for_transformers/utils/utils.py @@ -72,3 +72,14 @@ def get_gpu_family(): def is_ipex_available(): return _ipex_available + +_autoround_available = importlib.util.find_spec("auto_round") is not None +_autoround_version = "N/A" +if _autoround_available: + try: + _autoround_version = importlib_metadata.version("auto_round") + except importlib_metadata.PackageNotFoundError: + _autoround_available = False + +def is_autoround_available(): + return _autoround_available diff --git a/requirements.txt b/requirements.txt index 08d44d83d64..c1fe02b9475 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,6 @@ accelerate cmake>=3.16 gguf -git+https://github.com/intel/auto-round.git@b65830f3f6cb32d92a5c8ba5f80ace12d517357b ninja optimum-intel py-cpuinfo