From c36564f130900bf046529f3556347f920f4bea35 Mon Sep 17 00:00:00 2001 From: Bill Nell Date: Tue, 8 Oct 2024 15:26:34 +0000 Subject: [PATCH] skip some tests if not supported --- tests/kernels/test_awq.py | 3 +++ tests/kernels/test_awq_marlin.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/tests/kernels/test_awq.py b/tests/kernels/test_awq.py index e421aca48af2..bff9c34da7b7 100644 --- a/tests/kernels/test_awq.py +++ b/tests/kernels/test_awq.py @@ -1,11 +1,13 @@ import os +import pytest import torch from tests.kernels.utils import opcheck from vllm import _custom_ops as ops # noqa: F401 +@pytest.mark.skipif(not hasattr(torch.ops._C, "awq_dequantize")) def test_awq_dequantize_opcheck(): os.environ["VLLM_USE_TRITON_AWQ"] = "0" qweight = torch.randint(-2000000000, @@ -21,6 +23,7 @@ def test_awq_dequantize_opcheck(): (qweight, scales, zeros, split_k_iters, thx, thy)) +@pytest.mark.skipif(not hasattr(torch.ops._C, "awq_gemm")) def test_awq_gemm_opcheck(): os.environ["VLLM_USE_TRITON_AWQ"] = "0" input = torch.rand((2, 8192), device='cuda', dtype=torch.float16) diff --git a/tests/kernels/test_awq_marlin.py b/tests/kernels/test_awq_marlin.py index 0738ea9b97ed..1c5221fc39f8 100644 --- a/tests/kernels/test_awq_marlin.py +++ b/tests/kernels/test_awq_marlin.py @@ -7,6 +7,7 @@ from tests.kernels.utils import (compute_max_diff, stack_and_dev, torch_moe, torch_moe_single) +from vllm import _custom_ops as ops from vllm.model_executor.layers.fused_moe.fused_marlin_moe import ( fused_marlin_moe, single_marlin_moe) from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk @@ -21,6 +22,9 @@ @pytest.mark.parametrize("e", [8, 64]) @pytest.mark.parametrize("topk", [2, 6]) @pytest.mark.parametrize("group_size", [-1, 32, 64, 128]) +@pytest.mark.skipif( + not (ops.supports_moe_ops and hasattr(torch.ops._moe_C, "marlin_gemm_moe")) +) def test_fused_marlin_moe_awq( m: int, n: int,