diff --git a/support/Machines/CaltechHpc.yaml b/support/Machines/CaltechHpc.yaml index 5b4949d1d1a9..59c5f27fc9fa 100644 --- a/support/Machines/CaltechHpc.yaml +++ b/support/Machines/CaltechHpc.yaml @@ -9,7 +9,7 @@ Machine: https://www.hpc.caltech.edu/documentation # Split one physical 56 core node into two charm nodes DefaultTasksPerNode: 2 - DefaultProcsPerTasks: 28 + DefaultProcsPerTask: 28 DefaultQueue: "expansion" DefaultTimeLimit: "1-00:00:00" LaunchCommandSingleNode: ["mpirun", "-n", "1"] diff --git a/support/Machines/Mbot.yaml b/support/Machines/Mbot.yaml index 0492dffe0298..b8d7119c2132 100644 --- a/support/Machines/Mbot.yaml +++ b/support/Machines/Mbot.yaml @@ -8,7 +8,7 @@ Machine: More information: https://github.com/sxs-collaboration/WelcomeToSXS/wiki/Mbot DefaultTasksPerNode: 6 - DefaultProcsPerTasks: 32 + DefaultProcsPerTask: 32 DefaultQueue: "normal" DefaultTimeLimit: "1-00:00:00" LaunchCommandSingleNode: ["mpirun", "-n", "1"] diff --git a/support/Machines/Ocean.yaml b/support/Machines/Ocean.yaml index 2a01d3182a99..440eb2d11aa0 100644 --- a/support/Machines/Ocean.yaml +++ b/support/Machines/Ocean.yaml @@ -6,7 +6,7 @@ Machine: Description: | Supercomputer at Cal State Fullerton hosted by Geoffrey Lovelace. DefaultTasksPerNode: 1 - DefaultProcsPerTasks: 20 + DefaultProcsPerTask: 20 DefaultQueue: "orca-1" DefaultTimeLimit: "1-00:00:00" LaunchCommandSingleNode: [] diff --git a/support/Machines/Oscar.yaml b/support/Machines/Oscar.yaml index b01907a734e8..85982b84ab7a 100644 --- a/support/Machines/Oscar.yaml +++ b/support/Machines/Oscar.yaml @@ -9,7 +9,7 @@ Machine: https://docs.ccv.brown.edu/oscar # Split one physical 32 core node into two charm nodes DefaultTasksPerNode: 2 - DefaultProcsPerTasks: 16 + DefaultProcsPerTask: 16 DefaultQueue: "batch" DefaultTimeLimit: "1-00:00:00" LaunchCommandSingleNode: ["mpirun", "-n", "1"] diff --git a/support/Machines/Perlmutter.yaml b/support/Machines/Perlmutter.yaml index 24238a9f21fa..f80c9504fa3b 100644 --- a/support/Machines/Perlmutter.yaml +++ b/support/Machines/Perlmutter.yaml @@ -8,6 +8,6 @@ Machine: More information: https://docs.nersc.gov/systems/perlmutter/ DefaultTasksPerNode: 8 - DefaultProcsPerTasks: 32 + DefaultProcsPerTask: 32 DefaultQueue: "regular" DefaultTimeLimit: "1-00:00:00" diff --git a/support/Machines/Sonic.yaml b/support/Machines/Sonic.yaml index 04a8f02c0a95..30db553f7811 100644 --- a/support/Machines/Sonic.yaml +++ b/support/Machines/Sonic.yaml @@ -8,7 +8,7 @@ Machine: More information: https://it.icts.res.in/docs/sonic-cluster/ DefaultTasksPerNode: 3 - DefaultProcsPerTasks: 32 + DefaultProcsPerTask: 32 DefaultQueue: "long" DefaultTimeLimit: "1-00:00:00" LaunchCommandSingleNode: ["mpirun", "-n", "1"] diff --git a/support/Python/Machines.py b/support/Python/Machines.py index eb211f8833d6..abe5a9135950 100644 --- a/support/Python/Machines.py +++ b/support/Python/Machines.py @@ -41,10 +41,12 @@ class Machine(yaml.YAMLObject): any information that may help people get started using the machine. Provide links to wiki pages, signup pages, etc., for additional information. - DefaultProcsPerNode: Default number of worker threads spawned per node. + DefaultTasksPerNode: Default number of tasks per node (MPI ranks). + Often chosen to be the number of sockets on a node. + DefaultProcsPerTask: Default number of worker threads spawned per task. It is often advised to leave one core per node or socket free for communication, so this might be the number of cores or hyperthreads - per node minus one. + per node or socket minus one. DefaultQueue: Default queue that jobs are submitted to. On Slurm systems you can see the available queues with `sinfo`. DefaultTimeLimit: Default wall time limit for submitted jobs. For @@ -62,7 +64,8 @@ class Machine(yaml.YAMLObject): # The YAML machine files can have these attributes: Name: str Description: str - DefaultProcsPerNode: int + DefaultTasksPerNode: int + DefaultProcsPerTask: int DefaultQueue: str DefaultTimeLimit: str LaunchCommandSingleNode: List[str] diff --git a/tests/support/Python/Test_Machines.py b/tests/support/Python/Test_Machines.py index 6d9df4105f74..a5efc0649f80 100644 --- a/tests/support/Python/Test_Machines.py +++ b/tests/support/Python/Test_Machines.py @@ -21,7 +21,8 @@ def setUp(self): Machine=dict( Name="TestMachine", Description="Just for testing", - DefaultProcsPerNode=15, + DefaultTasksPerNode=2, + DefaultProcsPerTask=15, DefaultQueue="production", DefaultTimeLimit="1-00:00:00", LaunchCommandSingleNode=["mpirun", "-n", "1"], @@ -40,7 +41,8 @@ def test_this_machine(self): self.assertIsInstance(machine, Machine) self.assertEqual(machine.Name, "TestMachine") self.assertEqual(machine.Description, "Just for testing") - self.assertEqual(machine.DefaultProcsPerNode, 15) + self.assertEqual(machine.DefaultTasksPerNode, 2) + self.assertEqual(machine.DefaultProcsPerTask, 15) self.assertEqual(machine.DefaultQueue, "production") self.assertEqual(machine.DefaultTimeLimit, "1-00:00:00") self.assertEqual(machine.LaunchCommandSingleNode, ["mpirun", "-n", "1"])