diff --git a/deepmd/common.py b/deepmd/common.py index d7e485788b..29d32111a8 100644 --- a/deepmd/common.py +++ b/deepmd/common.py @@ -78,6 +78,7 @@ def add_data_requirement( repeat: int = 1, default: float = 0.0, dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, ): """Specify data requirements for training. @@ -103,6 +104,8 @@ def add_data_requirement( default value of data dtype : np.dtype, optional the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel """ data_requirement[key] = { "ndof": ndof, @@ -113,6 +116,7 @@ def add_data_requirement( "repeat": repeat, "default": default, "dtype": dtype, + "output_natoms_for_type_sel": output_natoms_for_type_sel, } diff --git a/deepmd/pt/utils/dataset.py b/deepmd/pt/utils/dataset.py index 67005b5ed3..77297d980c 100644 --- a/deepmd/pt/utils/dataset.py +++ b/deepmd/pt/utils/dataset.py @@ -61,4 +61,5 @@ def add_data_requirement(self, data_requirement: List[DataRequirementItem]): repeat=data_item["repeat"], default=data_item["default"], dtype=data_item["dtype"], + output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"], ) diff --git a/deepmd/utils/data.py b/deepmd/utils/data.py index 03e39e1f21..194c6b1e24 100644 --- a/deepmd/utils/data.py +++ b/deepmd/utils/data.py @@ -147,6 +147,7 @@ def add( repeat: int = 1, default: float = 0.0, dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, ): """Add a data item that to be loaded. @@ -173,6 +174,8 @@ def add( default value of data dtype : np.dtype, optional the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel """ self.data_dict[key] = { "ndof": ndof, @@ -184,6 +187,7 @@ def add( "reduce": None, "default": default, "dtype": dtype, + "output_natoms_for_type_sel": output_natoms_for_type_sel, } return self @@ -523,6 +527,9 @@ def _load_set(self, set_name: DPPath): repeat=self.data_dict[kk]["repeat"], default=self.data_dict[kk]["default"], dtype=self.data_dict[kk]["dtype"], + output_natoms_for_type_sel=self.data_dict[kk][ + "output_natoms_for_type_sel" + ], ) for kk in self.data_dict.keys(): if self.data_dict[kk]["reduce"] is not None: @@ -589,19 +596,25 @@ def _load_data( type_sel=None, default: float = 0.0, dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, ): if atomic: natoms = self.natoms idx_map = self.idx_map # if type_sel, then revise natoms and idx_map if type_sel is not None: - natoms = 0 + natoms_sel = 0 for jj in type_sel: - natoms += np.sum(self.atom_type == jj) - idx_map = self._idx_map_sel(self.atom_type, type_sel) + natoms_sel += np.sum(self.atom_type == jj) + idx_map_sel = self._idx_map_sel(self.atom_type, type_sel) + else: + natoms_sel = natoms + idx_map_sel = idx_map ndof = ndof_ * natoms else: ndof = ndof_ + natoms_sel = 0 + idx_map_sel = None if dtype is not None: pass elif high_prec: @@ -613,6 +626,38 @@ def _load_data( data = path.load_numpy().astype(dtype) try: # YWolfeee: deal with data shape error if atomic: + if type_sel is not None: + # check the data shape is nsel or natoms + if data.size == nframes * natoms_sel * ndof_: + if output_natoms_for_type_sel: + tmp = np.zeros( + [nframes, natoms, ndof_], dtype=data.dtype + ) + sel_mask = np.isin(self.atom_type, type_sel) + tmp[:, sel_mask] = data.reshape( + [nframes, natoms_sel, ndof_] + ) + data = tmp + else: + natoms = natoms_sel + idx_map = idx_map_sel + ndof = ndof_ * natoms + elif data.size == nframes * natoms * ndof_: + if output_natoms_for_type_sel: + pass + else: + sel_mask = np.isin(self.atom_type, type_sel) + data = data[:, sel_mask] + natoms = natoms_sel + idx_map = idx_map_sel + ndof = ndof_ * natoms + else: + raise ValueError( + f"The shape of the data {key} in {set_name}" + f"is {data.shape}, which doesn't match either" + f"({nframes}, {natoms_sel}, {ndof_}) or" + f"({nframes}, {natoms}, {ndof_})" + ) data = data.reshape([nframes, natoms, -1]) data = data[:, idx_map, :] data = data.reshape([nframes, -1]) @@ -621,13 +666,15 @@ def _load_data( explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`." log.error(str(err_message)) log.error(explanation) - raise ValueError(str(err_message) + ". " + explanation) + raise ValueError(str(err_message) + ". " + explanation) from err_message if repeat != 1: data = np.repeat(data, repeat).reshape([nframes, -1]) return np.float32(1.0), data elif must: raise RuntimeError("%s not found!" % path) else: + if type_sel is not None and not output_natoms_for_type_sel: + ndof = ndof_ * natoms_sel data = np.full([nframes, ndof], default, dtype=dtype) if repeat != 1: data = np.repeat(data, repeat).reshape([nframes, -1]) @@ -694,6 +741,8 @@ class DataRequirementItem: default value of data dtype : np.dtype, optional the dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool, optional + if True and type_sel is True, the atomic dimension will be natoms instead of nsel """ def __init__( @@ -707,6 +756,7 @@ def __init__( repeat: int = 1, default: float = 0.0, dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, ) -> None: self.key = key self.ndof = ndof @@ -717,6 +767,7 @@ def __init__( self.repeat = repeat self.default = default self.dtype = dtype + self.output_natoms_for_type_sel = output_natoms_for_type_sel self.dict = self.to_dict() def to_dict(self) -> dict: @@ -730,6 +781,7 @@ def to_dict(self) -> dict: "repeat": self.repeat, "default": self.default, "dtype": self.dtype, + "output_natoms_for_type_sel": self.output_natoms_for_type_sel, } def __getitem__(self, key: str): diff --git a/deepmd/utils/data_system.py b/deepmd/utils/data_system.py index da1dd04026..0c74abfed1 100644 --- a/deepmd/utils/data_system.py +++ b/deepmd/utils/data_system.py @@ -293,6 +293,10 @@ def add_dict(self, adict: dict) -> None: type_sel=adict[kk]["type_sel"], repeat=adict[kk]["repeat"], default=adict[kk]["default"], + dtype=adict[kk].get("dtype"), + output_natoms_for_type_sel=adict[kk].get( + "output_natoms_for_type_sel", False + ), ) def add( @@ -305,6 +309,8 @@ def add( type_sel: Optional[List[int]] = None, repeat: int = 1, default: float = 0.0, + dtype: Optional[np.dtype] = None, + output_natoms_for_type_sel: bool = False, ): """Add a data item that to be loaded. @@ -329,6 +335,10 @@ def add( The data will be repeated `repeat` times. default, default=0. Default value of data + dtype + The dtype of data, overwrites `high_prec` if provided + output_natoms_for_type_sel : bool + If True and type_sel is True, the atomic dimension will be natoms instead of nsel """ for ii in self.data_systems: ii.add( @@ -340,6 +350,8 @@ def add( repeat=repeat, type_sel=type_sel, default=default, + dtype=dtype, + output_natoms_for_type_sel=output_natoms_for_type_sel, ) def reduce(self, key_out, key_in): diff --git a/source/tests/tf/test_data_requirement.py b/source/tests/tf/test_data_requirement.py index cabea15de1..e825bc3f92 100644 --- a/source/tests/tf/test_data_requirement.py +++ b/source/tests/tf/test_data_requirement.py @@ -16,3 +16,4 @@ def test_add(self): self.assertEqual(data_requirement["test"]["high_prec"], False) self.assertEqual(data_requirement["test"]["repeat"], 1) self.assertEqual(data_requirement["test"]["default"], 0.0) + self.assertEqual(data_requirement["test"]["output_natoms_for_type_sel"], False) diff --git a/source/tests/tf/test_deepmd_data.py b/source/tests/tf/test_deepmd_data.py index 3998e0f3e3..94e1f4c571 100644 --- a/source/tests/tf/test_deepmd_data.py +++ b/source/tests/tf/test_deepmd_data.py @@ -83,6 +83,7 @@ def setUp(self): os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True) os.makedirs(os.path.join(self.data_name, "set.bar"), exist_ok=True) os.makedirs(os.path.join(self.data_name, "set.tar"), exist_ok=True) + os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True) np.savetxt(os.path.join(self.data_name, "type.raw"), np.array([1, 0]), fmt="%d") np.savetxt( os.path.join(self.data_name, "type_map.raw"), @@ -141,6 +142,16 @@ def setUp(self): np.save(path, self.test_frame_bar) # t n self.test_null = np.zeros([self.nframes, 2 * self.natoms]) + # tensor shape + path = os.path.join(self.data_name, "set.foo", "tensor_natoms.npy") + self.tensor_natoms = np.random.default_rng().random( + [self.nframes, self.natoms, 6] + ) + self.tensor_natoms[:, 0, :] = 0 + np.save(path, self.tensor_natoms) + path = os.path.join(self.data_name, "set.foo", "tensor_nsel.npy") + self.tensor_nsel = self.tensor_natoms[:, 1, :] + np.save(path, self.tensor_nsel) def tearDown(self): shutil.rmtree(self.data_name) @@ -292,6 +303,58 @@ def test_get_nbatch(self): nb = dd.get_numb_batch(2, 0) self.assertEqual(nb, 2) + def test_get_tensor(self): + dd_natoms = ( + DeepmdData(self.data_name) + .add( + "tensor_nsel", + 6, + atomic=True, + must=True, + type_sel=[0], + output_natoms_for_type_sel=True, + ) + .add( + "tensor_natoms", + 6, + atomic=True, + must=True, + type_sel=[0], + output_natoms_for_type_sel=True, + ) + ) + data_natoms = dd_natoms._load_set(os.path.join(self.data_name, "set.foo")) + dd_nsel = ( + DeepmdData(self.data_name) + .add( + "tensor_nsel", + 6, + atomic=True, + must=True, + type_sel=[0], + output_natoms_for_type_sel=False, + ) + .add( + "tensor_natoms", + 6, + atomic=True, + must=True, + type_sel=[0], + output_natoms_for_type_sel=False, + ) + ) + data_nsel = dd_nsel._load_set(os.path.join(self.data_name, "set.foo")) + np.testing.assert_allclose( + data_natoms["tensor_natoms"], data_natoms["tensor_nsel"] + ) + np.testing.assert_allclose(data_nsel["tensor_natoms"], data_nsel["tensor_nsel"]) + np.testing.assert_allclose( + data_natoms["tensor_natoms"].reshape(self.nframes, self.natoms, -1)[ + :, 0, : + ], + data_nsel["tensor_natoms"], + ) + def _comp_np_mat2(self, first, second): np.testing.assert_almost_equal(first, second, places)