Skip to content

Commit

Permalink
Merge pull request #850 from linsword13/fix-slurm-wm
Browse files Browse the repository at this point in the history
Fix slurm batch_submit self-reference
  • Loading branch information
linsword13 authored Jan 28, 2025
2 parents 9f4e662 + 8d8de1a commit 44914ea
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,46 @@
)


def test_slurm_workflow_default():
workspace_name = "test_slurm_workflow_default"

test_config = """
ramble:
variants:
workflow_manager: slurm
variables:
processes_per_node: 1
n_nodes: 1
applications:
hostname:
workloads:
local:
experiments:
test_default: {}
"""
with ramble.workspace.create(workspace_name) as ws:
ws.write()
config_path = os.path.join(ws.config_dir, ramble.workspace.config_file_name)
with open(config_path, "w+") as f:
f.write(test_config)
ws._re_read()
workspace("setup", "--dry-run", global_args=["-D", ws.root])

path = os.path.join(ws.experiment_dir, "hostname", "local", "test_default")
files = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
assert "batch_submit" in files
assert "batch_query" in files
assert "batch_cancel" in files
assert "batch_wait" in files
with open(os.path.join(path, "batch_submit")) as f:
content = f.read()
assert "slurm_experiment_sbatch" in content
assert "execute_experiment" not in content
assert ".slurm_job" in content
assert "sbatch" in content
assert "batch_submit" not in content


def test_slurm_workflow():
workspace_name = "test_slurm_workflow"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def _batch_submit_vars(self):
"`sbatch` is missing in the given `batch_submit` command"
)
else:
batch_submit_script = vars["batch_submit"]
batch_submit_script = vars["slurm_experiment_sbatch"]
batch_submit_cmd = f"sbatch {batch_submit_script}"
return {
"batch_submit_cmd": batch_submit_cmd,
Expand Down Expand Up @@ -151,7 +151,6 @@ def template_render_vars(self):
("#SBATCH --gpus-per-node {gpus_per_node}"),
]
partition = expander.expand_var_name("slurm_partition")
self._check_partition(partition)
if partition:
pragmas.append("#SBATCH -p {slurm_partition}")
extra_headers = (
Expand All @@ -165,33 +164,6 @@ def template_render_vars(self):
"workflow_hostfile_cmd": self.runner.get_hostfile_cmd(),
}

def _check_partition(self, partition):
"""Warns about potential issues of the slurm_partition config
Only gives out warning as the user may be relying on a custom
execute template that contains the relevant partition info.
"""
try:
partition_prop = self.runner.get_partitions()
except RunnerError:
return
if partition_prop is None:
return

partitions = partition_prop["partitions"]
if partition not in partitions:
default_partition = partition_prop["default_partition"]
if default_partition is not None and not partition:
logger.info(
"`slurm_partition` is not given, "
f"using default partition {default_partition}"
)
else:
logger.warn(
"Missing valid `slurm_partition` setting. "
f"It should be one of {partitions}"
)

def get_status(self, workspace):
expander = self.app_inst.expander
run_dir = expander.expand_var_name("experiment_run_dir")
Expand Down

0 comments on commit 44914ea

Please sign in to comment.