diff --git a/rllib/algorithms/algorithm_config.py b/rllib/algorithms/algorithm_config.py index 8ef9d6de5cf28..afd46debfbbd2 100644 --- a/rllib/algorithms/algorithm_config.py +++ b/rllib/algorithms/algorithm_config.py @@ -357,6 +357,7 @@ def __init__(self, algo_class: Optional[type] = None): self.num_learners = 0 self.num_gpus_per_learner = 0 self.num_cpus_per_learner = "auto" + self.custom_resources_per_learner = {} self.num_aggregator_actors_per_learner = 0 self.max_requests_in_flight_per_aggregator_actor = 3 self.local_gpu_idx = 0 @@ -2138,6 +2139,9 @@ def learners( num_learners: Optional[int] = NotProvided, num_cpus_per_learner: Optional[Union[str, float, int]] = NotProvided, num_gpus_per_learner: Optional[Union[float, int]] = NotProvided, + custom_resources_per_learner: Optional[ + Dict[str, Union[float, int]] + ] = NotProvided, num_aggregator_actors_per_learner: Optional[int] = NotProvided, max_requests_in_flight_per_aggregator_actor: Optional[float] = NotProvided, local_gpu_idx: Optional[int] = NotProvided, @@ -2164,6 +2168,11 @@ def learners( `num_learners=0`, any value greater than 0 runs the training on a single GPU on the main process, while a value of 0 runs the training on main process CPUs. + custom_resources_per_learner: A dict that specify custom resources allocated + per Learner worker. Similar to the GPU, if you declare a certain number + for NPU/HPU (which is already supported in ray train) greater than 0, + such as {"NPU": 1}, the training will run on the the corresponding + accelerator. num_aggregator_actors_per_learner: The number of aggregator actors per Learner (if num_learners=0, one local learner is created). Must be at least 1. Aggregator actors perform the task of a) converting episodes @@ -2196,6 +2205,8 @@ def learners( self.num_cpus_per_learner = num_cpus_per_learner if num_gpus_per_learner is not NotProvided: self.num_gpus_per_learner = num_gpus_per_learner + if custom_resources_per_learner is not NotProvided: + self.custom_resources_per_learner = custom_resources_per_learner if num_aggregator_actors_per_learner is not NotProvided: self.num_aggregator_actors_per_learner = num_aggregator_actors_per_learner if max_requests_in_flight_per_aggregator_actor is not NotProvided: diff --git a/rllib/core/learner/learner_group.py b/rllib/core/learner/learner_group.py index 1c5613a687bdb..6400a3aad8794 100644 --- a/rllib/core/learner/learner_group.py +++ b/rllib/core/learner/learner_group.py @@ -145,9 +145,11 @@ def __init__( # TODO (sven): Activate this when Ray has figured out GPU pre-loading. # - (0.01 * self.config.num_aggregator_actors_per_learner), ) + custom_resources_per_learner = self.config.custom_resources_per_learner resources_per_learner = { "CPU": num_cpus_per_learner, "GPU": num_gpus_per_learner, + **custom_resources_per_learner, } backend_executor = BackendExecutor( diff --git a/rllib/core/learner/torch/torch_learner.py b/rllib/core/learner/torch/torch_learner.py index 5e43315d133f4..7f38ef08a91d7 100644 --- a/rllib/core/learner/torch/torch_learner.py +++ b/rllib/core/learner/torch/torch_learner.py @@ -462,7 +462,11 @@ def build(self) -> None: after setting up all variables because `configure_optimizer_for_module` is called in this `Learner.build()`. """ - self._device = get_device(self.config, self.config.num_gpus_per_learner) + self._device = get_device( + self.config, + self.config.num_gpus_per_learner, + self.config.custom_resources_per_learner, + ) super().build() diff --git a/rllib/env/multi_agent_env_runner.py b/rllib/env/multi_agent_env_runner.py index 4c84a70e07b67..da1b753dc2996 100644 --- a/rllib/env/multi_agent_env_runner.py +++ b/rllib/env/multi_agent_env_runner.py @@ -99,6 +99,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs): self._device = get_device( self.config, 0 if not self.worker_index else self.config.num_gpus_per_env_runner, + self.config.custom_resources_per_env_runner, ) # Create the vectorized gymnasium env. diff --git a/rllib/env/single_agent_env_runner.py b/rllib/env/single_agent_env_runner.py index 07f51ffa16578..df62303da7f6a 100644 --- a/rllib/env/single_agent_env_runner.py +++ b/rllib/env/single_agent_env_runner.py @@ -92,6 +92,7 @@ def __init__(self, *, config: AlgorithmConfig, **kwargs): self._device = get_device( self.config, 0 if not self.worker_index else self.config.num_gpus_per_env_runner, + self.config.custom_resources_per_env_runner, ) # Create the vectorized gymnasium env. diff --git a/rllib/utils/framework.py b/rllib/utils/framework.py index c0b9a28fa4726..6e869454d372c 100644 --- a/rllib/utils/framework.py +++ b/rllib/utils/framework.py @@ -51,7 +51,11 @@ def convert_to_tensor( @PublicAPI -def get_device(config: "AlgorithmConfig", num_gpus_requested: int = 1): +def get_device( + config: "AlgorithmConfig", + num_gpus_requested: int = 1, + custom_resources_requested: Optional[dict] = None, +): """Returns a single device (CPU or some GPU) depending on a config. Args: @@ -59,6 +63,8 @@ def get_device(config: "AlgorithmConfig", num_gpus_requested: int = 1): num_gpus_requested: The number of GPUs actually requested. This may be the value of `config.num_gpus_per_env_runner` when for example calling this function from an EnvRunner. + custom_resources_requested: Similar to the GPU, the dictionary contains the + number of accelerators actually requested. Returns: A single device (or name) given `config` and `num_gpus_requested`. @@ -94,6 +100,12 @@ def get_device(config: "AlgorithmConfig", num_gpus_requested: int = 1): # `torch.cuda.device_count() = 1` and torch.device(0) maps to that GPU # with ID=1 on the node. return torch.device(config.local_gpu_idx) + elif custom_resources_requested: + from ray.air._internal.torch_utils import get_devices + + # The `get_devices()` api in ray.air should handle the custom accelerator + # and return torch.device("cpu") if not accelerator is available. + return get_devices()[0] else: return torch.device("cpu") else: