fix models/voronoi/featurize_mp_wbm.py featurizer.set_n_jobs(1)

wandb.log features as wandb.Table
janosh · Jun 20, 2023 · 8508c38 · 8508c38
1 parent 8acba18
commit 8508c38
Show file tree

Hide file tree

Showing 6 changed files with 25 additions and 19 deletions.
diff --git a/models/bowsr/test_bowsr.py b/models/bowsr/test_bowsr.py
@@ -58,9 +58,9 @@
 
 # %%
 slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
-out_path = f"{out_dir}/{slurm_array_task_id}.json.gz"
+out_path = f"{out_dir}/bowsr-preds-{slurm_array_task_id}.json.gz"
 
-print(f"Job started running {timestamp}")
+print(f"\nJob started running {timestamp}")
 print(f"{data_path = }")
 print(f"{out_path = }")
 print(f"{version('maml') = }")

diff --git a/models/cgcnn/train_cgcnn.py b/models/cgcnn/train_cgcnn.py
@@ -94,7 +94,7 @@
 
 
 # %%
-print(f"Job started running {timestamp}")
+print(f"\nJob started running {timestamp}")
 
 train_model(
     checkpoint="wandb",  # None | 'local' | 'wandb',

diff --git a/models/m3gnet/test_m3gnet.py b/models/m3gnet/test_m3gnet.py
@@ -53,10 +53,10 @@
 # %%
 slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
 
-print(f"Job started running {timestamp}")
+print(f"\nJob started running {timestamp}")
 print(f"{version('m3gnet') = }")
 
-out_path = f"{out_dir}/{slurm_array_task_id}.json.gz"
+out_path = f"{out_dir}/m3gnet-preds-{slurm_array_task_id}.json.gz"
 
 if os.path.isfile(out_path):
     raise SystemExit(f"{out_path = } already exists, exciting early")

diff --git a/models/megnet/test_megnet.py b/models/megnet/test_megnet.py
@@ -44,7 +44,7 @@
 
 
 # %%
-print(f"Job started running {timestamp}")
+print(f"\nJob started running {timestamp}")
 
 out_path = f"{out_dir}/megnet-e-form-preds.csv"
 if os.path.isfile(out_path):
@@ -56,7 +56,6 @@
 print(f"Loading from {data_path=}")
 df_wbm_structs = pd.read_json(data_path).set_index("material_id")
 
-
 megnet_mp_e_form = load_model(model_name := "Eform_MP_2019")
 
 
@@ -109,7 +108,7 @@
 out_col = "e_form_per_atom_megnet"
 df_wbm[out_col] = pd.Series(megnet_e_form_preds)
 
-df_wbm[out_col].reset_index().to_csv(out_path)
+df_wbm[out_col].reset_index().to_csv(out_path, index=False)
 
 
 # %%

diff --git a/models/voronoi/featurize_mp_wbm.py b/models/voronoi/featurize_mp_wbm.py
@@ -23,25 +23,29 @@
 data_path = f"{ROOT}/data/wbm/2022-10-19-wbm-init-structs.json.bz2"
 input_col = "initial_structure"
 data_name = "wbm" if "wbm" in data_path else "mp"
-slurm_array_task_count = 20
-job_name = f"voronoi-featurize-{data_name}"
+slurm_array_task_count = 10
+job_name = f"voronoi-features-{data_name}"
+log_dir = f"{module_dir}/{today}-{job_name}"
 
 slurm_vars = slurm_submit(
     job_name=job_name,
     partition="icelake-himem",
     account="LEE-SL3-CPU",
     time=(slurm_max_job_time := "5:0:0"),
     array=f"1-{slurm_array_task_count}",
-    log_dir=f"{module_dir}/{job_name}",
+    log_dir=log_dir,
 )
 
 
 # %%
-df = pd.read_json(data_path).set_index("material_id")
-
 slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
 run_name = f"{job_name}-{slurm_array_task_id}"
+out_path = f"{log_dir}/{run_name}.csv.bz2"
 
+if os.path.isfile(out_path):
+    raise SystemExit(f"{out_path = } already exists, exciting early")
+
+df = pd.read_json(data_path).set_index("material_id")
 df_this_job: pd.DataFrame = np.array_split(df, slurm_array_task_count)[
     slurm_array_task_id - 1
 ]
@@ -90,18 +94,21 @@
     feat_struct.StructureComposition(feat_comp.IonProperty(fast=True)),
 ]
 featurizer = MultipleFeaturizer(featurizers)
+# multiprocessing seems to be the cause of OOM errors on large structures even when
+# taking only small slice of the data and launching slurm jobs with --mem 100G
+featurizer.set_n_jobs(1)
 
 
 # %% prints lots of pymatgen warnings
 # > No electronegativity for Ne. Setting to NaN. This has no physical meaning, ...
 warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen")
 
 df_features = featurizer.featurize_dataframe(
-    df_this_job, input_col, ignore_errors=True, pbar=True
-)
+    df_this_job, input_col, ignore_errors=True, pbar=dict(position=0, leave=True)
+).drop(columns=input_col)
 
 
 # %%
-df_features.to_json(
-    f"{module_dir}/{today}-{run_name}.json.gz", default_handler=as_dict_handler
-)
+df_features.to_csv(out_path, default_handler=as_dict_handler)
+
+wandb.log({"voronoi_features": wandb.Table(dataframe=df_features)})
diff --git a/models/wrenformer/train_wrenformer.py b/models/wrenformer/train_wrenformer.py
@@ -48,7 +48,7 @@
 slurm_array_task_id = int(os.environ.get("SLURM_ARRAY_TASK_ID", 0))
 input_col = "wyckoff_spglib"
 
-print(f"Job started running {timestamp}")
+print(f"\nJob started running {timestamp}")
 print(f"{run_name=}")
 print(f"{data_path=}")