diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 633bc5ca95bf..5b4a786305e1 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -220,7 +220,6 @@ steps: - label: Tensorizer Test #mirror_hardwares: [amd] - soft_fail: true fast_check: true commands: - apt-get install -y curl libsodium23 diff --git a/tests/tensorizer_loader/conftest.py b/tests/tensorizer_loader/conftest.py new file mode 100644 index 000000000000..c5c6fc1057d3 --- /dev/null +++ b/tests/tensorizer_loader/conftest.py @@ -0,0 +1,45 @@ +# isort: skip_file + +import contextlib +import gc + +import pytest +import ray +import torch + +from vllm.distributed import (destroy_distributed_environment, + destroy_model_parallel) +from vllm.model_executor.model_loader.tensorizer import TensorizerConfig + + +def cleanup(): + destroy_model_parallel() + destroy_distributed_environment() + with contextlib.suppress(AssertionError): + torch.distributed.destroy_process_group() + gc.collect() + torch.cuda.empty_cache() + ray.shutdown() + + +@pytest.fixture() +def should_do_global_cleanup_after_test(request) -> bool: + """Allow subdirectories to skip global cleanup by overriding this fixture. + This can provide a ~10x speedup for non-GPU unit tests since they don't need + to initialize torch. + """ + + return True + + +@pytest.fixture(autouse=True) +def cleanup_fixture(should_do_global_cleanup_after_test: bool): + yield + if should_do_global_cleanup_after_test: + cleanup() + + +@pytest.fixture(autouse=True) +def tensorizer_config(): + config = TensorizerConfig(tensorizer_uri="vllm") + return config \ No newline at end of file diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/tensorizer_loader/test_tensorizer.py index b7030e3cd6d4..2adeae8874bd 100644 --- a/tests/tensorizer_loader/test_tensorizer.py +++ b/tests/tensorizer_loader/test_tensorizer.py @@ -40,7 +40,6 @@ tensorize_model_for_testing_script = os.path.join( os.path.dirname(__file__), "tensorize_vllm_model_for_testing.py") - def is_curl_installed(): try: subprocess.check_call(['curl', '--version']) @@ -63,10 +62,6 @@ def write_keyfile(keyfile_path: str): with open(keyfile_path, 'wb') as f: f.write(encryption_params.key) -@pytest.fixture(autouse=True) -def tensorizer_config(): - config = TensorizerConfig(tensorizer_uri="vllm") - return config @patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent') @@ -105,6 +100,7 @@ def test_can_deserialize_s3(vllm_runner): @pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed") def test_deserialized_encrypted_vllm_model_has_same_outputs( vllm_runner, tmp_path): + cleanup() with vllm_runner(model_ref) as vllm_model: model_path = tmp_path / (model_ref + ".tensors") key_path = tmp_path / (model_ref + ".key") @@ -316,6 +312,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner, def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path): + cleanup() model_ref = "facebook/opt-125m" model_path = tmp_path / (model_ref + ".tensors") config = TensorizerConfig(tensorizer_uri=str(model_path))