Skip to content

Commit

Permalink
allow loading either nsel or natoms atomic tensor data (#3394)
Browse files Browse the repository at this point in the history
A new parameter, `output_natoms_for_type_sel`, is added for the data
requirement. (default=false)
If sel_types is given, output_natoms_for_type_sel is true, and the data
dimension is nsel, it will be converted to natoms.
If sel_types is given, output_natoms_for_type_sel is false, and the data
dimension is natoms, it will be converted to nsel.
In other situations, it keeps the original shape.
The user can give data in either nsel or natoms, if `sel_types` and
`output_natoms_for_type_sel` are set.

---------

Signed-off-by: Jinzhe Zeng <jinzhe.zeng@rutgers.edu>
  • Loading branch information
njzjz authored Mar 3, 2024
1 parent 7af9e20 commit 9c508b7
Show file tree
Hide file tree
Showing 6 changed files with 137 additions and 4 deletions.
4 changes: 4 additions & 0 deletions deepmd/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def add_data_requirement(
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Specify data requirements for training.
Expand All @@ -103,6 +104,8 @@ def add_data_requirement(
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
data_requirement[key] = {
"ndof": ndof,
Expand All @@ -113,6 +116,7 @@ def add_data_requirement(
"repeat": repeat,
"default": default,
"dtype": dtype,
"output_natoms_for_type_sel": output_natoms_for_type_sel,
}


Expand Down
1 change: 1 addition & 0 deletions deepmd/pt/utils/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,5 @@ def add_data_requirement(self, data_requirement: List[DataRequirementItem]):
repeat=data_item["repeat"],
default=data_item["default"],
dtype=data_item["dtype"],
output_natoms_for_type_sel=data_item["output_natoms_for_type_sel"],
)
60 changes: 56 additions & 4 deletions deepmd/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ def add(
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Add a data item that to be loaded.
Expand All @@ -173,6 +174,8 @@ def add(
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
self.data_dict[key] = {
"ndof": ndof,
Expand All @@ -184,6 +187,7 @@ def add(
"reduce": None,
"default": default,
"dtype": dtype,
"output_natoms_for_type_sel": output_natoms_for_type_sel,
}
return self

Expand Down Expand Up @@ -523,6 +527,9 @@ def _load_set(self, set_name: DPPath):
repeat=self.data_dict[kk]["repeat"],
default=self.data_dict[kk]["default"],
dtype=self.data_dict[kk]["dtype"],
output_natoms_for_type_sel=self.data_dict[kk][
"output_natoms_for_type_sel"
],
)
for kk in self.data_dict.keys():
if self.data_dict[kk]["reduce"] is not None:
Expand Down Expand Up @@ -589,19 +596,25 @@ def _load_data(
type_sel=None,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
if atomic:
natoms = self.natoms
idx_map = self.idx_map
# if type_sel, then revise natoms and idx_map
if type_sel is not None:
natoms = 0
natoms_sel = 0
for jj in type_sel:
natoms += np.sum(self.atom_type == jj)
idx_map = self._idx_map_sel(self.atom_type, type_sel)
natoms_sel += np.sum(self.atom_type == jj)
idx_map_sel = self._idx_map_sel(self.atom_type, type_sel)
else:
natoms_sel = natoms
idx_map_sel = idx_map
ndof = ndof_ * natoms
else:
ndof = ndof_
natoms_sel = 0
idx_map_sel = None
if dtype is not None:
pass
elif high_prec:
Expand All @@ -613,6 +626,38 @@ def _load_data(
data = path.load_numpy().astype(dtype)
try: # YWolfeee: deal with data shape error
if atomic:
if type_sel is not None:
# check the data shape is nsel or natoms
if data.size == nframes * natoms_sel * ndof_:
if output_natoms_for_type_sel:
tmp = np.zeros(
[nframes, natoms, ndof_], dtype=data.dtype
)
sel_mask = np.isin(self.atom_type, type_sel)
tmp[:, sel_mask] = data.reshape(
[nframes, natoms_sel, ndof_]
)
data = tmp
else:
natoms = natoms_sel
idx_map = idx_map_sel
ndof = ndof_ * natoms
elif data.size == nframes * natoms * ndof_:
if output_natoms_for_type_sel:
pass
else:
sel_mask = np.isin(self.atom_type, type_sel)
data = data[:, sel_mask]
natoms = natoms_sel
idx_map = idx_map_sel
ndof = ndof_ * natoms
else:
raise ValueError(
f"The shape of the data {key} in {set_name}"
f"is {data.shape}, which doesn't match either"
f"({nframes}, {natoms_sel}, {ndof_}) or"
f"({nframes}, {natoms}, {ndof_})"
)
data = data.reshape([nframes, natoms, -1])
data = data[:, idx_map, :]
data = data.reshape([nframes, -1])
Expand All @@ -621,13 +666,15 @@ def _load_data(
explanation = "This error may occur when your label mismatch it's name, i.e. you might store global tensor in `atomic_tensor.npy` or atomic tensor in `tensor.npy`."
log.error(str(err_message))
log.error(explanation)
raise ValueError(str(err_message) + ". " + explanation)
raise ValueError(str(err_message) + ". " + explanation) from err_message
if repeat != 1:
data = np.repeat(data, repeat).reshape([nframes, -1])
return np.float32(1.0), data
elif must:
raise RuntimeError("%s not found!" % path)
else:
if type_sel is not None and not output_natoms_for_type_sel:
ndof = ndof_ * natoms_sel
data = np.full([nframes, ndof], default, dtype=dtype)
if repeat != 1:
data = np.repeat(data, repeat).reshape([nframes, -1])
Expand Down Expand Up @@ -694,6 +741,8 @@ class DataRequirementItem:
default value of data
dtype : np.dtype, optional
the dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool, optional
if True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""

def __init__(
Expand All @@ -707,6 +756,7 @@ def __init__(
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
) -> None:
self.key = key
self.ndof = ndof
Expand All @@ -717,6 +767,7 @@ def __init__(
self.repeat = repeat
self.default = default
self.dtype = dtype
self.output_natoms_for_type_sel = output_natoms_for_type_sel
self.dict = self.to_dict()

def to_dict(self) -> dict:
Expand All @@ -730,6 +781,7 @@ def to_dict(self) -> dict:
"repeat": self.repeat,
"default": self.default,
"dtype": self.dtype,
"output_natoms_for_type_sel": self.output_natoms_for_type_sel,
}

def __getitem__(self, key: str):
Expand Down
12 changes: 12 additions & 0 deletions deepmd/utils/data_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ def add_dict(self, adict: dict) -> None:
type_sel=adict[kk]["type_sel"],
repeat=adict[kk]["repeat"],
default=adict[kk]["default"],
dtype=adict[kk].get("dtype"),
output_natoms_for_type_sel=adict[kk].get(
"output_natoms_for_type_sel", False
),
)

def add(
Expand All @@ -305,6 +309,8 @@ def add(
type_sel: Optional[List[int]] = None,
repeat: int = 1,
default: float = 0.0,
dtype: Optional[np.dtype] = None,
output_natoms_for_type_sel: bool = False,
):
"""Add a data item that to be loaded.
Expand All @@ -329,6 +335,10 @@ def add(
The data will be repeated `repeat` times.
default, default=0.
Default value of data
dtype
The dtype of data, overwrites `high_prec` if provided
output_natoms_for_type_sel : bool
If True and type_sel is True, the atomic dimension will be natoms instead of nsel
"""
for ii in self.data_systems:
ii.add(
Expand All @@ -340,6 +350,8 @@ def add(
repeat=repeat,
type_sel=type_sel,
default=default,
dtype=dtype,
output_natoms_for_type_sel=output_natoms_for_type_sel,
)

def reduce(self, key_out, key_in):
Expand Down
1 change: 1 addition & 0 deletions source/tests/tf/test_data_requirement.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,4 @@ def test_add(self):
self.assertEqual(data_requirement["test"]["high_prec"], False)
self.assertEqual(data_requirement["test"]["repeat"], 1)
self.assertEqual(data_requirement["test"]["default"], 0.0)
self.assertEqual(data_requirement["test"]["output_natoms_for_type_sel"], False)
63 changes: 63 additions & 0 deletions source/tests/tf/test_deepmd_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def setUp(self):
os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.bar"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.tar"), exist_ok=True)
os.makedirs(os.path.join(self.data_name, "set.foo"), exist_ok=True)
np.savetxt(os.path.join(self.data_name, "type.raw"), np.array([1, 0]), fmt="%d")
np.savetxt(
os.path.join(self.data_name, "type_map.raw"),
Expand Down Expand Up @@ -141,6 +142,16 @@ def setUp(self):
np.save(path, self.test_frame_bar)
# t n
self.test_null = np.zeros([self.nframes, 2 * self.natoms])
# tensor shape
path = os.path.join(self.data_name, "set.foo", "tensor_natoms.npy")
self.tensor_natoms = np.random.default_rng().random(
[self.nframes, self.natoms, 6]
)
self.tensor_natoms[:, 0, :] = 0
np.save(path, self.tensor_natoms)
path = os.path.join(self.data_name, "set.foo", "tensor_nsel.npy")
self.tensor_nsel = self.tensor_natoms[:, 1, :]
np.save(path, self.tensor_nsel)

def tearDown(self):
shutil.rmtree(self.data_name)
Expand Down Expand Up @@ -292,6 +303,58 @@ def test_get_nbatch(self):
nb = dd.get_numb_batch(2, 0)
self.assertEqual(nb, 2)

def test_get_tensor(self):
dd_natoms = (
DeepmdData(self.data_name)
.add(
"tensor_nsel",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=True,
)
.add(
"tensor_natoms",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=True,
)
)
data_natoms = dd_natoms._load_set(os.path.join(self.data_name, "set.foo"))
dd_nsel = (
DeepmdData(self.data_name)
.add(
"tensor_nsel",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=False,
)
.add(
"tensor_natoms",
6,
atomic=True,
must=True,
type_sel=[0],
output_natoms_for_type_sel=False,
)
)
data_nsel = dd_nsel._load_set(os.path.join(self.data_name, "set.foo"))
np.testing.assert_allclose(
data_natoms["tensor_natoms"], data_natoms["tensor_nsel"]
)
np.testing.assert_allclose(data_nsel["tensor_natoms"], data_nsel["tensor_nsel"])
np.testing.assert_allclose(
data_natoms["tensor_natoms"].reshape(self.nframes, self.natoms, -1)[
:, 0, :
],
data_nsel["tensor_natoms"],
)

def _comp_np_mat2(self, first, second):
np.testing.assert_almost_equal(first, second, places)

Expand Down

0 comments on commit 9c508b7

Please sign in to comment.