Skip to content

Commit

Permalink
[cuda][misc] remove error_on_invalid_device_count_status (vllm-projec…
Browse files Browse the repository at this point in the history
  • Loading branch information
youkaichao authored and kylesayrs committed Aug 17, 2024
1 parent 95119ad commit fbcb286
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 32 deletions.
3 changes: 0 additions & 3 deletions vllm/executor/multiproc_gpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from vllm.sequence import ExecuteModelRequest, SamplerOutput
from vllm.triton_utils import maybe_set_triton_cache_manager
from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless,
error_on_invalid_device_count_status,
get_distributed_init_method, get_open_port,
get_vllm_instance_id, make_async,
update_environment_variables)
Expand Down Expand Up @@ -79,8 +78,6 @@ def _init_executor(self) -> None:
f"please ensure that world_size ({world_size}) "
f"is less than than max local gpu count ({cuda_device_count})")

error_on_invalid_device_count_status()

# Multiprocessing-based executor does not support multi-node setting.
# Since it only works for single node, we can use the loopback address
# 127.0.0.1 for communication.
Expand Down
9 changes: 3 additions & 6 deletions vllm/executor/ray_gpu_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@
from vllm.executor.ray_utils import RayWorkerWrapper, ray
from vllm.logger import init_logger
from vllm.sequence import ExecuteModelRequest, SamplerOutput
from vllm.utils import (_run_task_with_lock,
error_on_invalid_device_count_status,
get_distributed_init_method, get_ip, get_open_port,
get_vllm_instance_id, make_async)
from vllm.utils import (_run_task_with_lock, get_distributed_init_method,
get_ip, get_open_port, get_vllm_instance_id,
make_async)

if ray is not None:
from ray.util.scheduling_strategies import PlacementGroupSchedulingStrategy
Expand Down Expand Up @@ -216,8 +215,6 @@ def _init_workers_ray(self, placement_group: "PlacementGroup",
distributed_init_method = get_distributed_init_method(
driver_ip, get_open_port())

error_on_invalid_device_count_status()

# Initialize the actual workers inside worker wrapper.
init_worker_all_kwargs = [
self._get_worker_kwargs(
Expand Down
23 changes: 0 additions & 23 deletions vllm/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import argparse
import asyncio
import contextlib
import datetime
import enum
import gc
Expand Down Expand Up @@ -923,28 +922,6 @@ def cuda_device_count_stateless() -> int:
return _cuda_device_count_stateless(envs.CUDA_VISIBLE_DEVICES)


def error_on_invalid_device_count_status():
cache_entries = 0
with contextlib.suppress(Exception):
# future pytorch will fix the issue, device_count will not be cached
# at that time, `.cache_info().currsize` will error out
cache_entries = torch.cuda.device_count.cache_info( # type: ignore
).currsize
if cache_entries != 0:
# the function is already called, and the result is cached
remembered = torch.cuda.device_count()
current = cuda_device_count_stateless()
if remembered > current:
raise RuntimeError(
"The number of CUDA devices has changed since the first "
"call to torch.cuda.device_count(). This is not allowed "
"and may result in undefined behavior. Please check out "
"https://github.com/vllm-project/vllm/issues/6056 to "
"find the first call to torch.cuda.device_count() "
"and defer it until the engine is up. Or you can set "
"CUDA_VISIBLE_DEVICES to the GPUs you want to use.")


# NVML utils
# Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`,
# all the related functions work on real physical device ids.
Expand Down

0 comments on commit fbcb286

Please sign in to comment.