vllm-project · youkaichao · Jul 27, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -220,7 +220,6 @@ steps:
 
 - label: Tensorizer Test
   #mirror_hardwares: [amd]
-  soft_fail: true
   fast_check: true
   commands:
     - apt-get install -y curl libsodium23

diff --git a/tests/tensorizer_loader/conftest.py b/tests/tensorizer_loader/conftest.py
@@ -0,0 +1,45 @@
+# isort: skip_file
+
+import contextlib
+import gc
+
+import pytest
+import ray
+import torch
+
+from vllm.distributed import (destroy_distributed_environment,
+                              destroy_model_parallel)
+from vllm.model_executor.model_loader.tensorizer import TensorizerConfig
+
+
+def cleanup():
+    destroy_model_parallel()
+    destroy_distributed_environment()
+    with contextlib.suppress(AssertionError):
+        torch.distributed.destroy_process_group()
+    gc.collect()
+    torch.cuda.empty_cache()
+    ray.shutdown()
+
+
+@pytest.fixture()
+def should_do_global_cleanup_after_test(request) -> bool:
+    """Allow subdirectories to skip global cleanup by overriding this fixture.
+    This can provide a ~10x speedup for non-GPU unit tests since they don't need
+    to initialize torch.
+    """
+
+    return True
+
+
+@pytest.fixture(autouse=True)
+def cleanup_fixture(should_do_global_cleanup_after_test: bool):
+    yield
+    if should_do_global_cleanup_after_test:
+        cleanup()
+
+
+@pytest.fixture(autouse=True)
+def tensorizer_config():
+    config = TensorizerConfig(tensorizer_uri="vllm")
+    return config
diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/tensorizer_loader/test_tensorizer.py
@@ -40,7 +40,6 @@
 tensorize_model_for_testing_script = os.path.join(
     os.path.dirname(__file__), "tensorize_vllm_model_for_testing.py")
 
-
 def is_curl_installed():
     try:
         subprocess.check_call(['curl', '--version'])
@@ -63,10 +62,6 @@ def write_keyfile(keyfile_path: str):
     with open(keyfile_path, 'wb') as f:
         f.write(encryption_params.key)
 
-@pytest.fixture(autouse=True)
-def tensorizer_config():
-    config = TensorizerConfig(tensorizer_uri="vllm")
-    return config
 
 
 @patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent')
@@ -105,6 +100,7 @@ def test_can_deserialize_s3(vllm_runner):
 @pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
 def test_deserialized_encrypted_vllm_model_has_same_outputs(
         vllm_runner, tmp_path):
+    cleanup()
     with vllm_runner(model_ref) as vllm_model:
         model_path = tmp_path / (model_ref + ".tensors")
         key_path = tmp_path / (model_ref + ".key")
@@ -316,6 +312,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,
 
 
 def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
+    cleanup()
     model_ref = "facebook/opt-125m"
     model_path = tmp_path / (model_ref + ".tensors")
     config = TensorizerConfig(tensorizer_uri=str(model_path))