Skip to content

Commit

Permalink
update eda_mp_trj.py asserts to match fixed 2024-09-03-mp-trj.extxyz.…
Browse files Browse the repository at this point in the history
…zip (now with same number of frames as original = 1_580_395)

assert len(mp_trj_atoms) == 145_923
assert len(df_mp_trj) == 1_580_395
  • Loading branch information
janosh committed Sep 5, 2024
1 parent 0d54d68 commit 9058aa7
Showing 1 changed file with 5 additions and 7 deletions.
12 changes: 5 additions & 7 deletions data/mp/eda_mp_trj.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,17 @@
# %% extract extXYZ files from zipped directory without unpacking the whole archive
# takes ~8 mins on M2 Max
# takes ~5 mins on M3 Max
atoms_list = ase_atoms_from_zip(
zip_path, file_check=lambda name: name.startswith("mptrj-gga-ggapu/mp-")
)
atoms_list = ase_atoms_from_zip(zip_path)

mp_trj_atoms: dict[str, list[ase.Atoms]] = defaultdict(list)
for atoms in atoms_list:
mp_id = atoms.info.get("mp_id", "no-id")
assert mp_id.startswith("mp-")
mp_id = atoms.info.get(Key.mat_id, "no-id")
assert mp_id.startswith(("mp-", "mvc-"))
mp_trj_atoms[mp_id].append(atoms)

del atoms_list # free up memory

assert len(mp_trj_atoms) == 145_919 # number of unique MP IDs
assert len(mp_trj_atoms) == 145_923 # number of unique MP IDs


# %%
Expand All @@ -102,7 +100,7 @@ def info_dict_to_id(info: dict[str, int | str]) -> str:
}
).T.convert_dtypes() # convert object columns to float/int where possible
df_mp_trj.index.name = "frame_id"
assert len(df_mp_trj) == 1_580_312 # number of total frames
assert len(df_mp_trj) == 1_580_395 # number of total frames
if Key.formula not in df_mp_trj:
raise KeyError(f"{Key.formula!s} not in {df_mp_trj.columns=}")

Expand Down

0 comments on commit 9058aa7

Please sign in to comment.