Remove unnecessary env override since it is already set by the pipeline

DarkLight1337 · DarkLight1337 · commit 9fc34170ea8c · 2024-06-28T12:10:28.000Z
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
@@ -44,9 +44,9 @@ steps:
   working_dir: "/vllm-workspace/tests"
   num_gpus: 2
   commands:
+  - bash ../.buildkite/download-images.sh
   # FIXIT: find out which code initialize cuda before running the test
   # before the fix, we need to use spawn to test it
-  - bash ../.buildkite/download-images.sh
   - export VLLM_WORKER_MULTIPROC_METHOD=spawn
   - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py
   - TEST_DIST_MODEL=facebook/opt-125m DISTRIBUTED_EXECUTOR_BACKEND=ray pytest -v -s distributed/test_basic_distributed_correctness.py
diff --git a/tests/distributed/test_multimodal_broadcast.py b/tests/distributed/test_multimodal_broadcast.py
@@ -17,8 +17,6 @@
 
 from vllm.utils import cuda_device_count_stateless
 
-from ..utils import override_env
-
 model = os.environ["TEST_DIST_MODEL"]
 
 if model.startswith("llava-hf/llava"):
@@ -29,25 +27,16 @@
     raise NotImplementedError(f"Unsupported model: {model}")
 
 
-@pytest.fixture(scope="function", autouse=True)
-def tensor_parallel_ctx(tensor_parallel_size: int):
-    if cuda_device_count_stateless() < tensor_parallel_size:
-        pytest.skip(
-            f"Need at least {tensor_parallel_size} GPUs to run the test.")
-
-    if tensor_parallel_size > 1:
-        with override_env("VLLM_WORKER_MULTIPROC_METHOD", "spawn"):
-            yield
-    else:
-        yield
-
-
 @pytest.mark.parametrize("tensor_parallel_size", [2])
 @pytest.mark.parametrize("dtype", ["half"])
 @pytest.mark.parametrize("max_tokens", [128])
 def test_models(hf_runner, vllm_runner, image_assets,
                 tensor_parallel_size: int, dtype: str,
                 max_tokens: int) -> None:
+    if cuda_device_count_stateless() < tensor_parallel_size:
+        pytest.skip(
+            f"Need at least {tensor_parallel_size} GPUs to run the test.")
+
     distributed_executor_backend = os.getenv("DISTRIBUTED_EXECUTOR_BACKEND")
 
     run_test(
diff --git a/tests/utils.py b/tests/utils.py
@@ -219,16 +219,3 @@ def wait_for_gpu_memory_to_clear(devices: List[int],
                              f'{dur_s=:.02f} ({threshold_bytes/2**30=})')
 
         time.sleep(5)
-
-
-@contextmanager
-def override_env(name: str, value: str):
-    prev_value = os.environ.get(name)
-    os.environ[name] = value
-
-    yield
-
-    if prev_value is None:
-        del os.environ[name]
-    else:
-        os.environ[name] = prev_value