Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bugfix]: Fix Tensorizer test failures #6835

Merged
merged 6 commits into from
Jul 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .buildkite/test-pipeline.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,6 @@ steps:

- label: Tensorizer Test
#mirror_hardwares: [amd]
soft_fail: true
fast_check: true
commands:
- apt-get install -y curl libsodium23
Expand Down
45 changes: 45 additions & 0 deletions tests/tensorizer_loader/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# isort: skip_file

import contextlib
import gc

import pytest
import ray
import torch

from vllm.distributed import (destroy_distributed_environment,
destroy_model_parallel)
from vllm.model_executor.model_loader.tensorizer import TensorizerConfig


def cleanup():
destroy_model_parallel()
destroy_distributed_environment()
with contextlib.suppress(AssertionError):
torch.distributed.destroy_process_group()
gc.collect()
torch.cuda.empty_cache()
ray.shutdown()


@pytest.fixture()
def should_do_global_cleanup_after_test(request) -> bool:
"""Allow subdirectories to skip global cleanup by overriding this fixture.
This can provide a ~10x speedup for non-GPU unit tests since they don't need
to initialize torch.
"""

return True


@pytest.fixture(autouse=True)
def cleanup_fixture(should_do_global_cleanup_after_test: bool):
yield
if should_do_global_cleanup_after_test:
cleanup()


@pytest.fixture(autouse=True)
def tensorizer_config():
config = TensorizerConfig(tensorizer_uri="vllm")
return config
7 changes: 2 additions & 5 deletions tests/tensorizer_loader/test_tensorizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
tensorize_model_for_testing_script = os.path.join(
os.path.dirname(__file__), "tensorize_vllm_model_for_testing.py")


def is_curl_installed():
try:
subprocess.check_call(['curl', '--version'])
Expand All @@ -63,10 +62,6 @@ def write_keyfile(keyfile_path: str):
with open(keyfile_path, 'wb') as f:
f.write(encryption_params.key)

@pytest.fixture(autouse=True)
def tensorizer_config():
config = TensorizerConfig(tensorizer_uri="vllm")
return config


@patch('vllm.model_executor.model_loader.tensorizer.TensorizerAgent')
Expand Down Expand Up @@ -105,6 +100,7 @@ def test_can_deserialize_s3(vllm_runner):
@pytest.mark.skipif(not is_curl_installed(), reason="cURL is not installed")
def test_deserialized_encrypted_vllm_model_has_same_outputs(
vllm_runner, tmp_path):
cleanup()
with vllm_runner(model_ref) as vllm_model:
model_path = tmp_path / (model_ref + ".tensors")
key_path = tmp_path / (model_ref + ".key")
Expand Down Expand Up @@ -316,6 +312,7 @@ def test_deserialized_encrypted_vllm_model_with_tp_has_same_outputs(vllm_runner,


def test_vllm_tensorized_model_has_same_outputs(vllm_runner, tmp_path):
cleanup()
model_ref = "facebook/opt-125m"
model_path = tmp_path / (model_ref + ".tensors")
config = TensorizerConfig(tensorizer_uri=str(model_path))
Expand Down
Loading