Skip to content

Commit

Permalink
fix recursion error when saving voronoi-tesselation to JSON
Browse files Browse the repository at this point in the history
refactor matminer featurizer imports
python_slurm_submit() return more env vars
rename slurm_submit_python() to slurm_submit()
  • Loading branch information
janosh committed Jun 20, 2023
1 parent 5804d13 commit 38a9d7a
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 55 deletions.
9 changes: 5 additions & 4 deletions matbench_discovery/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from datetime import datetime

SLURM_KEYS = (
"job_id array_task_id array_task_count mem_per_node nodelist submit_host".split()
)
"job_id array_task_id array_task_count mem_per_node nodelist submit_host"
"job_partition job_user job_account tasks_per_node job_qos"
).split()


def _get_calling_file_path(frame: int = 1) -> str:
Expand All @@ -22,7 +23,7 @@ def _get_calling_file_path(frame: int = 1) -> str:
return os.path.abspath(caller_path)


def slurm_submit_python(
def slurm_submit(
job_name: str,
log_dir: str,
time: str,
Expand All @@ -45,7 +46,7 @@ def slurm_submit_python(
ID and array task ID.
time (str): 'HH:MM:SS' time limit for the job.
py_file_path (str, optional): Path to the python script to be submitted.
Defaults to the path of the file calling slurm_submit_python().
Defaults to the path of the file calling slurm_submit().
partition (str, optional): Slurm partition.
account (str, optional): Account to charge for this job.
slurm_flags (Sequence[str], optional): Extra slurm CLI flags. Defaults to ().
Expand Down
7 changes: 3 additions & 4 deletions models/bowsr/slurm_array_bowsr_wbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
from tqdm import tqdm

from matbench_discovery import ROOT, as_dict_handler
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

__author__ = "Janosh Riebesell"
__date__ = "2022-08-15"
Expand All @@ -41,7 +41,7 @@

data_path = f"{ROOT}/data/2022-10-19-wbm-init-structs.json.gz"

slurm_vars = slurm_submit_python(
slurm_vars = slurm_submit(
job_name=job_name,
log_dir=out_dir,
partition="icelake-himem",
Expand Down Expand Up @@ -97,9 +97,8 @@
megnet_version=version("megnet"),
optimize_kwargs=optimize_kwargs,
task_type=task_type,
slurm_array_task_count=slurm_array_task_count,
slurm_max_job_time=slurm_max_job_time,
**slurm_vars,
slurm_vars=slurm_vars,
)
if wandb.run is None:
wandb.login()
Expand Down
4 changes: 2 additions & 2 deletions models/cgcnn/slurm_train_cgcnn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from torch.utils.data import DataLoader
from tqdm import tqdm

from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

"""
Train a Wrenformer ensemble of size n_folds on target_col of data_path.
Expand All @@ -33,7 +33,7 @@
today = timestamp.split("@")[0]
log_dir = f"{os.path.dirname(__file__)}/{today}-{run_name}"

slurm_submit_python(
slurm_submit(
job_name=run_name,
partition="ampere",
account="LEE-SL3-GPU",
Expand Down
4 changes: 2 additions & 2 deletions models/cgcnn/use_cgcnn_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from matbench_discovery import ROOT
from matbench_discovery.plot_scripts import df_wbm
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

__author__ = "Janosh Riebesell"
__date__ = "2022-08-15"
Expand All @@ -31,7 +31,7 @@
ensemble_id = "cgcnn-e_form-ensemble-1"
run_name = f"{today}-{ensemble_id}-IS2RE"

slurm_submit_python(
slurm_submit(
job_name=run_name,
partition="ampere",
account="LEE-SL3-GPU",
Expand Down
7 changes: 3 additions & 4 deletions models/m3gnet/slurm_array_m3gnet_wbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from tqdm import tqdm

from matbench_discovery import ROOT, as_dict_handler
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

"""
To slurm submit this file: python path/to/file.py slurm-submit
Expand All @@ -36,7 +36,7 @@
job_name = f"m3gnet-wbm-{task_type}-{slurm_job_id}"
out_dir = f"{module_dir}/{today}-{job_name}"

slurm_vars = slurm_submit_python(
slurm_vars = slurm_submit(
job_name=job_name,
log_dir=out_dir,
partition="icelake-himem",
Expand Down Expand Up @@ -77,11 +77,10 @@
run_params = dict(
data_path=data_path,
m3gnet_version=version("m3gnet"),
slurm_array_task_count=slurm_array_task_count,
task_type=task_type,
slurm_max_job_time=slurm_max_job_time,
df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
**slurm_vars,
slurm_vars=slurm_vars,
)
if wandb.run is None:
wandb.login()
Expand Down
56 changes: 25 additions & 31 deletions models/voronoi/featurize_mp_wbm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,45 +3,34 @@
import warnings
from datetime import datetime

import matminer.featurizers.composition as feat_comp
import matminer.featurizers.structure as feat_struct
import numpy as np
import pandas as pd
import wandb
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers.composition import (
ElementProperty,
IonProperty,
Stoichiometry,
ValenceOrbital,
)
from matminer.featurizers.structure import (
ChemicalOrdering,
MaximumPackingEfficiency,
SiteStatsFingerprint,
StructuralHeterogeneity,
StructureComposition,
)
from pymatgen.core import Structure
from tqdm import tqdm

from matbench_discovery import ROOT
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery import ROOT, as_dict_handler
from matbench_discovery.slurm import slurm_submit

today = f"{datetime.now():%Y-%m-%d}"
module_dir = os.path.dirname(__file__)


# data_path = f"{ROOT}/data/mp/2022-09-16-mp-computed-structure-entries.json.gz"
data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
input_col = "structure"
input_col = "initial_structure"
data_name = "wbm" if "wbm" in data_path else "mp"
slurm_array_task_count = 100
slurm_array_task_count = 20
job_name = f"voronoi-featurize-{data_name}"

slurm_vars = slurm_submit_python(
slurm_vars = slurm_submit(
job_name=job_name,
partition="icelake-himem",
account="LEE-SL3-CPU",
time=(slurm_max_job_time := "3:0:0"),
time=(slurm_max_job_time := "5:0:0"),
array=f"1-{slurm_array_task_count}",
log_dir=f"{module_dir}/{job_name}",
)
Expand All @@ -51,6 +40,8 @@
df = pd.read_json(data_path).set_index("material_id")

slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
run_name = f"{job_name}-{slurm_array_task_id}"

df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[
slurm_array_task_id - 1
]
Expand All @@ -69,30 +60,33 @@
data_path=data_path,
slurm_max_job_time=slurm_max_job_time,
df=dict(shape=str(df_this_job.shape), columns=", ".join(df_this_job)),
**slurm_vars,
input_col=input_col,
slurm_vars=slurm_vars,
)
if wandb.run is None:
wandb.login()

wandb.init(
project="matbench-discovery",
name=f"{job_name}-{slurm_array_task_id}",
name=run_name,
config=run_params,
)


# %% Create the featurizer: Ward et al. use a variety of different featurizers
# https://journals.aps.org/prb/abstract/10.1103/PhysRevB.96.024104
featurizers = [
SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
StructuralHeterogeneity(),
ChemicalOrdering(),
MaximumPackingEfficiency(),
SiteStatsFingerprint.from_preset("LocalPropertyDifference_ward-prb-2017"),
StructureComposition(Stoichiometry()),
StructureComposition(ElementProperty.from_preset("magpie")),
StructureComposition(ValenceOrbital(props=["frac"])),
StructureComposition(IonProperty(fast=True)),
feat_struct.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
feat_struct.StructuralHeterogeneity(),
feat_struct.ChemicalOrdering(),
feat_struct.MaximumPackingEfficiency(),
feat_struct.SiteStatsFingerprint.from_preset(
"LocalPropertyDifference_ward-prb-2017"
),
feat_struct.StructureComposition(feat_comp.Stoichiometry()),
feat_struct.StructureComposition(feat_comp.ElementProperty.from_preset("magpie")),
feat_struct.StructureComposition(feat_comp.ValenceOrbital(props=["frac"])),
feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)),
]
featurizer = MultipleFeaturizer(featurizers)

Expand All @@ -108,5 +102,5 @@

# %%
df_features.to_json(
f"{module_dir}/{today}-voronoi-tesselation-{data_name}-features.json.gz"
f"{module_dir}/{today}-{run_name}.json.gz", default_handler=as_dict_handler
)
4 changes: 2 additions & 2 deletions models/wrenformer/mp/use_wrenformer_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from aviary.wrenformer.model import Wrenformer

from matbench_discovery import ROOT
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

__author__ = "Janosh Riebesell"
__date__ = "2022-08-15"
Expand All @@ -28,7 +28,7 @@
assert "wbm" in data_path
run_name = "wrenformer-wbm-IS2RE"

slurm_submit_python(
slurm_submit(
job_name=run_name,
partition="ampere",
account="LEE-SL3-GPU",
Expand Down
4 changes: 2 additions & 2 deletions models/wrenformer/slurm_train_wrenformer_ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from aviary.train import df_train_test_split, train_wrenformer

from matbench_discovery import ROOT
from matbench_discovery.slurm import slurm_submit_python
from matbench_discovery.slurm import slurm_submit

"""
Train a Wrenformer ensemble of size n_folds on target_col of data_path.
Expand All @@ -31,7 +31,7 @@
dataset = "mp"
log_dir = f"{os.path.dirname(__file__)}/{dataset}/{today}-{run_name}"

slurm_submit_python(
slurm_submit(
job_name=run_name,
partition="ampere",
account="LEE-SL3-GPU",
Expand Down
8 changes: 4 additions & 4 deletions tests/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import pytest
from pytest import CaptureFixture

from matbench_discovery.slurm import _get_calling_file_path, slurm_submit_python
from matbench_discovery.slurm import _get_calling_file_path, slurm_submit

today = f"{datetime.now():%Y-%m-%d}"

Expand All @@ -21,7 +21,7 @@ def test_slurm_submit(capsys: CaptureFixture[str], py_file_path: str | None) ->
partition = "fake-partition"
account = "fake-account"

func_call = lambda: slurm_submit_python(
func_call = lambda: slurm_submit(
job_name=job_name,
log_dir=log_dir,
time=time,
Expand All @@ -35,10 +35,10 @@ def test_slurm_submit(capsys: CaptureFixture[str], py_file_path: str | None) ->

assert slurm_vars == {"slurm_job_id": "1234"}
stdout, stderr = capsys.readouterr()
# check slurm_submit_python() did nothing in normal mode
# check slurm_submit() did nothing in normal mode
assert stderr == stderr == ""

# check slurm_submit_python() prints cmd and calls subprocess.run() in submit mode
# check slurm_submit() prints cmd and calls subprocess.run() in submit mode
with pytest.raises(SystemExit), patch("sys.argv", ["slurm-submit"]), patch(
"matbench_discovery.slurm.subprocess.run"
) as mock_subprocess_run:
Expand Down

0 comments on commit 38a9d7a

Please sign in to comment.