ray-project · liuxsh9 · Dec 31, 2024 · Dec 31, 2024 · Dec 31, 2024 · Dec 31, 2024
@@ -357,6 +357,7 @@ def __init__(self, algo_class: Optional[type] = None):
         self.num_learners = 0
         self.num_gpus_per_learner = 0
         self.num_cpus_per_learner = "auto"
+        self.custom_resources_per_learner = {}
         self.num_aggregator_actors_per_learner = 0
         self.max_requests_in_flight_per_aggregator_actor = 3
         self.local_gpu_idx = 0
@@ -2138,6 +2139,9 @@ def learners(
         num_learners: Optional[int] = NotProvided,
         num_cpus_per_learner: Optional[Union[str, float, int]] = NotProvided,
         num_gpus_per_learner: Optional[Union[float, int]] = NotProvided,
+        custom_resources_per_learner: Optional[
+            Dict[str, Union[float, int]]
+        ] = NotProvided,
         num_aggregator_actors_per_learner: Optional[int] = NotProvided,
         max_requests_in_flight_per_aggregator_actor: Optional[float] = NotProvided,
         local_gpu_idx: Optional[int] = NotProvided,
@@ -2164,6 +2168,11 @@ def learners(
                 `num_learners=0`, any value greater than 0 runs the
                 training on a single GPU on the main process, while a value of 0 runs
                 the training on main process CPUs.
+            custom_resources_per_learner: A dict that specify custom resources allocated
+                per Learner worker. Similar to the GPU, if you declare a certain number
+                for NPU/HPU (which is already supported in ray train) greater than 0,
+                such as {"NPU": 1}, the training will run on the the corresponding
+                accelerator.
             num_aggregator_actors_per_learner: The number of aggregator actors per
                 Learner (if num_learners=0, one local learner is created). Must be at
                 least 1. Aggregator actors perform the task of a) converting episodes
@@ -2196,6 +2205,8 @@ def learners(
             self.num_cpus_per_learner = num_cpus_per_learner
         if num_gpus_per_learner is not NotProvided:
             self.num_gpus_per_learner = num_gpus_per_learner
+        if custom_resources_per_learner is not NotProvided:
+            self.custom_resources_per_learner = custom_resources_per_learner
         if num_aggregator_actors_per_learner is not NotProvided:
             self.num_aggregator_actors_per_learner = num_aggregator_actors_per_learner
         if max_requests_in_flight_per_aggregator_actor is not NotProvided:

@@ -145,9 +145,11 @@ def __init__(
                 # TODO (sven): Activate this when Ray has figured out GPU pre-loading.
                 # - (0.01 * self.config.num_aggregator_actors_per_learner),
             )
+            custom_resources_per_learner = self.config.custom_resources_per_learner
             resources_per_learner = {
                 "CPU": num_cpus_per_learner,
                 "GPU": num_gpus_per_learner,
+                **custom_resources_per_learner,
             }
 
             backend_executor = BackendExecutor(

@@ -462,7 +462,11 @@ def build(self) -> None:
         after setting up all variables because `configure_optimizer_for_module` is
         called in this `Learner.build()`.
         """
-        self._device = get_device(self.config, self.config.num_gpus_per_learner)
+        self._device = get_device(
+            self.config,
+            self.config.num_gpus_per_learner,
+            self.config.custom_resources_per_learner,
+        )
 
         super().build()
 

@@ -99,6 +99,7 @@ def __init__(self, config: AlgorithmConfig, **kwargs):
         self._device = get_device(
             self.config,
             0 if not self.worker_index else self.config.num_gpus_per_env_runner,
+            self.config.custom_resources_per_env_runner,
         )
 
         # Create the vectorized gymnasium env.

@@ -92,6 +92,7 @@ def __init__(self, *, config: AlgorithmConfig, **kwargs):
         self._device = get_device(
             self.config,
             0 if not self.worker_index else self.config.num_gpus_per_env_runner,
+            self.config.custom_resources_per_env_runner,
         )
 
         # Create the vectorized gymnasium env.

@@ -51,14 +51,20 @@ def convert_to_tensor(
 
 
 @PublicAPI
-def get_device(config: "AlgorithmConfig", num_gpus_requested: int = 1):
+def get_device(
+    config: "AlgorithmConfig",
+    num_gpus_requested: int = 1,
+    custom_resources_requested: Optional[dict] = None,
+):
     """Returns a single device (CPU or some GPU) depending on a config.
 
     Args:
         config: An AlgorithmConfig to extract information from about the device to use.
         num_gpus_requested: The number of GPUs actually requested. This may be the value
             of `config.num_gpus_per_env_runner` when for example calling this function
             from an EnvRunner.
+        custom_resources_requested: Similar to the GPU, the dictionary contains the
+        number of accelerators actually requested.
 
     Returns:
         A single device (or name) given `config` and `num_gpus_requested`.
@@ -94,6 +100,12 @@ def get_device(config: "AlgorithmConfig", num_gpus_requested: int = 1):
                 # `torch.cuda.device_count() = 1` and torch.device(0) maps to that GPU
                 # with ID=1 on the node.
                 return torch.device(config.local_gpu_idx)
+        elif custom_resources_requested:
+            from ray.air._internal.torch_utils import get_devices
+
+            # The `get_devices()` api in ray.air should handle the custom accelerator
+            # and return torch.device("cpu") if not accelerator is available.
+            return get_devices()[0]
         else:
             return torch.device("cpu")
     else: