diff --git a/nemo/collections/llm/recipes/run/executor.py b/nemo/collections/llm/recipes/run/executor.py
index fe14a4f55bd2..466cce4d95b7 100644
--- a/nemo/collections/llm/recipes/run/executor.py
+++ b/nemo/collections/llm/recipes/run/executor.py
@@ -11,16 +11,35 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from typing import Optional
+
 import nemo_run as run
+import torch
 
 
 @run.cli.factory
-def torchrun(devices: int = 8) -> run.Config[run.LocalExecutor]:
-    """Local executor using torchrun."""
+def torchrun(devices: Optional[int] = None) -> run.Config[run.LocalExecutor]:
+    """
+    Local executor using torchrun.
+
+    Args:
+        devices (Optional[int]): Number of devices to use. If None, it will use all available CUDA devices.
+
+    Returns:
+        run.Config[run.LocalExecutor]: Configuration for the local executor using torchrun.
+    """
     env_vars = {
         "TORCH_NCCL_AVOID_RECORD_STREAMS": "1",
     }
 
+    if devices is None:
+        if torch.cuda.is_available():
+            devices = torch.cuda.device_count()
+        else:
+            raise RuntimeError(
+                "Cannot infer the 'ntasks_per_node' parameter as CUDA is not available: please specify explicitely."
+            )
+
     executor = run.Config(
         run.LocalExecutor,
         ntasks_per_node=devices,