From 9058aa745afd29f2eb103a8dbc8ee7e875dc07a3 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Thu, 5 Sep 2024 18:41:51 -0400 Subject: [PATCH] update eda_mp_trj.py asserts to match fixed 2024-09-03-mp-trj.extxyz.zip (now with same number of frames as original = 1_580_395) assert len(mp_trj_atoms) == 145_923 assert len(df_mp_trj) == 1_580_395 --- data/mp/eda_mp_trj.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/data/mp/eda_mp_trj.py b/data/mp/eda_mp_trj.py index 3f0332aa..4cdce5a9 100644 --- a/data/mp/eda_mp_trj.py +++ b/data/mp/eda_mp_trj.py @@ -64,19 +64,17 @@ # %% extract extXYZ files from zipped directory without unpacking the whole archive # takes ~8 mins on M2 Max # takes ~5 mins on M3 Max -atoms_list = ase_atoms_from_zip( - zip_path, file_check=lambda name: name.startswith("mptrj-gga-ggapu/mp-") -) +atoms_list = ase_atoms_from_zip(zip_path) mp_trj_atoms: dict[str, list[ase.Atoms]] = defaultdict(list) for atoms in atoms_list: - mp_id = atoms.info.get("mp_id", "no-id") - assert mp_id.startswith("mp-") + mp_id = atoms.info.get(Key.mat_id, "no-id") + assert mp_id.startswith(("mp-", "mvc-")) mp_trj_atoms[mp_id].append(atoms) del atoms_list # free up memory -assert len(mp_trj_atoms) == 145_919 # number of unique MP IDs +assert len(mp_trj_atoms) == 145_923 # number of unique MP IDs # %% @@ -102,7 +100,7 @@ def info_dict_to_id(info: dict[str, int | str]) -> str: } ).T.convert_dtypes() # convert object columns to float/int where possible df_mp_trj.index.name = "frame_id" -assert len(df_mp_trj) == 1_580_312 # number of total frames +assert len(df_mp_trj) == 1_580_395 # number of total frames if Key.formula not in df_mp_trj: raise KeyError(f"{Key.formula!s} not in {df_mp_trj.columns=}")