diff --git a/doc/api/smartsim_api.rst b/doc/api/smartsim_api.rst index 2af7295df..045c06b1b 100644 --- a/doc/api/smartsim_api.rst +++ b/doc/api/smartsim_api.rst @@ -111,6 +111,7 @@ steps to a batch. .. autosummary:: SrunSettings.set_nodes + SrunSettings.set_node_feature SrunSettings.set_tasks SrunSettings.set_tasks_per_node SrunSettings.set_walltime diff --git a/doc/changelog.rst b/doc/changelog.rst index 18c8f7bb8..9c78bf67c 100644 --- a/doc/changelog.rst +++ b/doc/changelog.rst @@ -18,7 +18,8 @@ To be released at some future point in time Description -- Colo Orchestrator setup now blocks application start until setup finished. +- Add method to specify node features for a Slurm job +- Colo Orchestrator setup now blocks application start until setup finished - ExecArgs handling correction - ReadTheDocs config file added and enabled on PRs - Enforce changelog updates @@ -31,6 +32,9 @@ Description Detailed Notes +- Users can now specify node features for a Slurm job through + ``SrunSettings.set_node_feature``. The method accepts a string + or list of strings. (SmartSim-PR529_) - The request to the colocated entrypoints file within the shell script is now a blocking process. Once the Orchestrator is setup, it returns which moves the process to the background and allows the application to @@ -61,6 +65,7 @@ Detailed Notes Slurm and Open MPI. (SmartSim-PR520_) +.. _SmartSim-PR529: https://github.com/CrayLabs/SmartSim/pull/529 .. _SmartSim-PR522: https://github.com/CrayLabs/SmartSim/pull/522 .. _SmartSim-PR524: https://github.com/CrayLabs/SmartSim/pull/524 .. _SmartSim-PR520: https://github.com/CrayLabs/SmartSim/pull/520 diff --git a/smartsim/settings/base.py b/smartsim/settings/base.py index d7760decc..4e5b5cf4e 100644 --- a/smartsim/settings/base.py +++ b/smartsim/settings/base.py @@ -325,6 +325,19 @@ def set_time(self, hours: int = 0, minutes: int = 0, seconds: int = 0) -> None: self._fmt_walltime(int(hours), int(minutes), int(seconds)) ) + def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: + """Specify the node feature for this job + + :param feature_list: node feature to launch on + :type feature_list: str | list[str] + """ + logger.warning( + ( + "Feature specification not implemented for this " + f"RunSettings type: {type(self)}" + ) + ) + @staticmethod def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str: """Convert hours, minutes, and seconds into valid walltime format diff --git a/smartsim/settings/slurmSettings.py b/smartsim/settings/slurmSettings.py index 935a8df39..61a3e9841 100644 --- a/smartsim/settings/slurmSettings.py +++ b/smartsim/settings/slurmSettings.py @@ -243,6 +243,21 @@ def set_broadcast(self, dest_path: t.Optional[str] = None) -> None: """ self.run_args["bcast"] = dest_path + def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None: + """Specify the node feature for this job + + This sets ``-C`` + + :param feature_list: node feature to launch on + :type feature_list: str | list[str] + :raises TypeError: if not str or list of str + """ + if isinstance(feature_list, str): + feature_list = [feature_list.strip()] + elif not all(isinstance(feature, str) for feature in feature_list): + raise TypeError("node_feature argument must be string or list of strings") + self.run_args["C"] = ",".join(feature_list) + @staticmethod def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str: """Convert hours, minutes, and seconds into valid walltime format diff --git a/tests/test_run_settings.py b/tests/test_run_settings.py index 25566248d..056dad64b 100644 --- a/tests/test_run_settings.py +++ b/tests/test_run_settings.py @@ -339,6 +339,7 @@ def test_set_format_args(set_str, val, key): pytest.param("set_task_map", (3,), id="set_task_map"), pytest.param("set_cpus_per_task", (4,), id="set_cpus_per_task"), pytest.param("set_hostlist", ("hostlist",), id="set_hostlist"), + pytest.param("set_node_feature", ("P100",), id="set_node_feature"), pytest.param( "set_hostlist_from_file", ("~/hostfile",), id="set_hostlist_from_file" ), diff --git a/tests/test_slurm_settings.py b/tests/test_slurm_settings.py index aa5b2be11..51d4022dc 100644 --- a/tests/test_slurm_settings.py +++ b/tests/test_slurm_settings.py @@ -338,6 +338,21 @@ def test_set_hostlist(): rs.set_hostlist([5]) +def test_set_node_feature(): + rs = SrunSettings("python") + rs.set_node_feature(["P100", "V100"]) + assert rs.run_args["C"] == "P100,V100" + + rs.set_node_feature("P100") + assert rs.run_args["C"] == "P100" + + with pytest.raises(TypeError): + rs.set_node_feature(5) + + with pytest.raises(TypeError): + rs.set_node_feature(["P100", 5]) + + def test_set_hostlist_from_file(): rs = SrunSettings("python") rs.set_hostlist_from_file("./path/to/hostfile")