-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add models/chgnet/{test_chgnet,join_m3gnet_results}.py
convert remaining hard-coded data filepaths to use DATA_FILES wrap ML relaxations in try/except to skip failing structures not crashing jobs reduces missing preds rate
- Loading branch information
Showing
12 changed files
with
405 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
"""Concatenate chgnet results from multiple data files generated by slurm job array | ||
into single file. | ||
""" | ||
|
||
|
||
# %% | ||
from __future__ import annotations | ||
|
||
import os | ||
import warnings | ||
from glob import glob | ||
|
||
import pandas as pd | ||
from megnet.utils.models import load_model | ||
from pymatgen.core import Structure | ||
from pymatgen.entries.compatibility import MaterialsProject2020Compatibility | ||
from pymatgen.entries.computed_entries import ComputedStructureEntry | ||
from pymatviz import density_scatter | ||
from tqdm import tqdm | ||
|
||
from matbench_discovery import today | ||
from matbench_discovery.data import DATA_FILES, as_dict_handler | ||
from matbench_discovery.energy import get_e_form_per_atom | ||
from matbench_discovery.preds import df_wbm as df_summary | ||
from matbench_discovery.preds import e_form_col | ||
|
||
__author__ = "Janosh Riebesell" | ||
__date__ = "2023-03-01" | ||
|
||
warnings.filterwarnings(action="ignore", category=UserWarning, module="pymatgen") | ||
|
||
|
||
# %% | ||
module_dir = os.path.dirname(__file__) | ||
task_type = "IS2RE" | ||
date = "2023-03-02" | ||
glob_pattern = f"{date}-chgnet-wbm-{task_type}*/*.json.gz" | ||
file_paths = sorted(glob(f"{module_dir}/{glob_pattern}")) | ||
print(f"Found {len(file_paths):,} files for {glob_pattern = }") | ||
|
||
dfs: dict[str, pd.DataFrame] = {} | ||
|
||
|
||
# %% | ||
for file_path in tqdm(file_paths): | ||
if file_path in dfs: | ||
continue | ||
df = pd.read_json(file_path).set_index("material_id") | ||
# drop trajectory to save memory | ||
dfs[file_path] = df.drop(columns="chgnet_trajectory") | ||
|
||
|
||
# %% | ||
df_chgnet = pd.concat(dfs.values()).round(4) | ||
|
||
|
||
# %% | ||
df_wbm = pd.read_json(DATA_FILES.wbm_computed_structure_entries).set_index( | ||
"material_id" | ||
) | ||
|
||
df_wbm["cse"] = [ | ||
ComputedStructureEntry.from_dict(x) for x in tqdm(df_wbm.computed_structure_entry) | ||
] | ||
|
||
|
||
# %% transfer chgnet energies and relaxed structures WBM CSEs | ||
cse: ComputedStructureEntry | ||
for row in tqdm(df_chgnet.itertuples(), total=len(df_chgnet)): | ||
mat_id, struct_dict, chgnet_energy, *_ = row | ||
chgnet_struct = Structure.from_dict(struct_dict) | ||
cse = df_wbm.loc[mat_id, "cse"] | ||
cse._energy = chgnet_energy # cse._energy is the uncorrected energy | ||
cse._structure = chgnet_struct | ||
df_chgnet.loc[mat_id, "cse"] = cse | ||
|
||
|
||
# %% | ||
df_chgnet["e_form_per_atom_chgnet_uncorrected"] = [ | ||
get_e_form_per_atom(cse) for cse in tqdm(df_chgnet.cse) | ||
] | ||
|
||
|
||
# %% apply energy corrections | ||
out = MaterialsProject2020Compatibility().process_entries( | ||
df_chgnet.cse, verbose=True, clean=True | ||
) | ||
assert len(out) == len(df_chgnet) | ||
|
||
|
||
# %% compute corrected formation energies | ||
df_chgnet["e_form_per_atom_chgnet"] = [ | ||
get_e_form_per_atom(cse) for cse in tqdm(df_chgnet.cse) | ||
] | ||
|
||
df_chgnet[e_form_col] = df_summary[e_form_col] | ||
|
||
|
||
# %% | ||
ax = density_scatter( | ||
df=df_chgnet, x="e_form_per_atom_chgnet", y="e_form_per_atom_chgnet_uncorrected" | ||
) | ||
ax = density_scatter(df=df_chgnet, x="e_form_per_atom_chgnet", y=e_form_col) | ||
|
||
|
||
# %% load 2019 MEGNet formation energy model | ||
megnet_mp_e_form = load_model("Eform_MP_2019") | ||
megnet_e_form_preds: dict[str, float] = {} | ||
|
||
|
||
# %% predict formation energies on chgnet relaxed structure with MEGNet | ||
for material_id, cse in tqdm(df_wbm.cse.items(), total=len(df_wbm)): | ||
if material_id in megnet_e_form_preds: | ||
continue | ||
try: | ||
struct = cse.structure | ||
[e_form_per_atom] = megnet_mp_e_form.predict_structure(struct) | ||
megnet_e_form_preds[material_id] = e_form_per_atom | ||
except Exception as exc: | ||
print(f"Failed to predict {material_id=}: {exc}") | ||
|
||
df_chgnet["e_form_per_atom_chgnet_megnet"] = pd.Series(megnet_e_form_preds) | ||
|
||
assert ( | ||
n_isna := df_chgnet.e_form_per_atom_chgnet_megnet.isna().sum() | ||
) < 10, f"{n_isna=}, expected 7 or similar" | ||
|
||
|
||
# %% | ||
ax = density_scatter( | ||
df=df_chgnet, x="e_form_per_atom_chgnet", y="e_form_per_atom_chgnet_megnet" | ||
) | ||
|
||
|
||
# %% | ||
out_path = f"{module_dir}/{today}-chgnet-wbm-{task_type}.json.gz" | ||
df_chgnet = df_chgnet.round(4) | ||
df_chgnet.reset_index().to_json(out_path, default_handler=as_dict_handler) | ||
|
||
df_chgnet.select_dtypes("number").to_csv(out_path.replace(".json.gz", ".csv")) | ||
|
||
# in_path = f"{module_dir}/2022-10-31-chgnet-wbm-IS2RE.json.gz" | ||
# df_chgnet_csv = pd.read_csv(in_path.replace(".json.gz", ".csv")) | ||
# df_chgnet = pd.read_json(in_path).set_index("material_id") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
model_name: CHGNet | ||
model_version: 0.0.1 | ||
matbench_discovery_version: 1.0 | ||
date_added: "2023-03-03" | ||
date_published: "2023-03-01" | ||
authors: | ||
- name: Bowen Deng | ||
affiliation: UC Berkeley | ||
- name: Peichen Zhong | ||
affiliation: UC Berkeley | ||
orcid: https://orcid.org/0000-0003-1921-1628 | ||
email: zhongpc@berkeley.edu | ||
- name: KyuJung Jun | ||
affiliation: UC Berkeley | ||
orcid: https://orcid.org/0000-0003-1974-028X | ||
- name: Kevin Han | ||
affiliation: UC Berkeley | ||
orcid: https://orcid.org/0000-0002-4028-2108 | ||
- name: Christopher J. Bartel | ||
affiliation: University of Minnesota | ||
orcid: https://orcid.org/0000-0002-5198-5036 | ||
- name: Christopher J. Bartel | ||
affiliation: Gerbrand Ceder | ||
orcid: https://orcid.org/0000-0001-9275-3605 | ||
email: gceder@berkeley.edu | ||
repo: https://github.com/CederGroupHub/chgnet | ||
doi: https://doi.org/10.48550/arXiv.2302.14231 | ||
preprint: https://arxiv.org/abs/2302.14231 | ||
requirements: | ||
torch: 1.11.0 | ||
ase: 3.22.0 | ||
pymatgen: 2022.10.22 | ||
numpy: 1.24.0 | ||
pandas: 1.5.1 | ||
trained_on_benchmark: false | ||
|
||
notes: | ||
description: The Crystal Hamiltonian Graph Neural Network (CHGNet) is a universal GNN-based interatomic potential trained on energies, forces, stresses and magnetic moments from the MP trajectory dataset containing ∼1.5 million inorganic structures. | ||
training: Using pre-trained model released with preprint. Training set unreleased until after review. |
Oops, something went wrong.