Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

breaking: pt: remove data preprocess from data stat #3261

Merged
merged 15 commits into from
Feb 13, 2024
2 changes: 1 addition & 1 deletion deepmd/pt/model/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def distinguish_types(self) -> bool:
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return False
return self.se_atten.distinguish_types()

@property
def dim_out(self):
Expand Down
8 changes: 8 additions & 0 deletions deepmd/pt/model/descriptor/hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,14 @@ def get_dim_in(self) -> int:
def get_dim_emb(self):
return self.dim_emb

def distinguish_types(self) -> bool:
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return True in [
iProzd marked this conversation as resolved.
Show resolved Hide resolved
descriptor.distinguish_types() for descriptor in self.descriptor_list
]

@property
def dim_out(self):
"""Returns the output dimension of this descriptor."""
Expand Down
45 changes: 24 additions & 21 deletions deepmd/pt/model/descriptor/repformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
env,
)
from deepmd.pt.utils.nlist import (
build_neighbor_list,
process_input,
)
from deepmd.pt.utils.utils import (
get_activation_fn,
Expand Down Expand Up @@ -178,6 +178,12 @@
"""Returns the embedding dimension g2."""
return self.g2_dim

def distinguish_types(self) -> bool:
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return False

@property
def dim_out(self):
"""Returns the output dimension of this descriptor."""
Expand Down Expand Up @@ -272,44 +278,41 @@
suma2 = []
mixed_type = "real_natoms_vec" in merged[0]
for system in merged:
index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3)
extended_coord = torch.gather(system["coord"], dim=1, index=index)
extended_coord = extended_coord - system["shift"]
index = system["mapping"]
extended_atype = torch.gather(system["atype"], dim=1, index=index)
nloc = system["atype"].shape[-1]
#######################################################
# dirty hack here! the interface of dataload should be
# redesigned to support descriptors like dpa2
#######################################################
nlist = build_neighbor_list(
extended_coord,
extended_atype,
nloc,
self.rcut,
coord, atype, box, natoms = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
extended_coord, extended_atype, mapping, nlist = process_input(
coord,
atype,
self.get_rcut(),
self.get_sel(),
distinguish_types=False,
distinguish_types=self.distinguish_types(),
box=box,
)
env_mat, _, _ = prod_env_mat_se_a(
extended_coord,
nlist,
system["atype"],
atype,
self.mean,
self.stddev,
self.rcut,
self.rcut_smth,
)
if not mixed_type:
sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt(
env_mat.detach().cpu().numpy(), ndescrpt, system["natoms"]
env_mat.detach().cpu().numpy(), ndescrpt, natoms
)
else:
real_natoms_vec = system["real_natoms_vec"]

Check warning on line 309 in deepmd/pt/model/descriptor/repformers.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/model/descriptor/repformers.py#L309

Added line #L309 was not covered by tests
sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt(
env_mat.detach().cpu().numpy(),
ndescrpt,
system["real_natoms_vec"],
real_natoms_vec,
mixed_type=mixed_type,
real_atype=system["atype"].detach().cpu().numpy(),
real_atype=atype.detach().cpu().numpy(),
)
sumr.append(sysr)
suma.append(sysa)
Expand Down
36 changes: 29 additions & 7 deletions deepmd/pt/model/descriptor/se_a.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,11 @@
from deepmd.pt.model.network.network import (
TypeFilter,
)
from deepmd.pt.utils.nlist import (
process_input,
)

log = logging.getLogger(__name__)
github-advanced-security[bot] marked this conversation as resolved.
Fixed
Show resolved Hide resolved

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -100,7 +105,7 @@
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return True
return self.sea.distinguish_types()

@property
def dim_out(self):
Expand Down Expand Up @@ -347,6 +352,12 @@
"""Returns the input dimension."""
return self.dim_in

def distinguish_types(self) -> bool:
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return True

@property
def dim_out(self):
"""Returns the output dimension of this descriptor."""
Expand Down Expand Up @@ -381,20 +392,31 @@
sumr2 = []
suma2 = []
for system in merged:
index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3)
extended_coord = torch.gather(system["coord"], dim=1, index=index)
extended_coord = extended_coord - system["shift"]
coord, atype, box, natoms = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
extended_coord, extended_atype, mapping, nlist = process_input(
coord,
atype,
self.get_rcut(),
self.get_sel(),
distinguish_types=self.distinguish_types(),
box=box,
)
env_mat, _, _ = prod_env_mat_se_a(
extended_coord,
system["nlist"],
system["atype"],
nlist,
atype,
self.mean,
self.stddev,
self.rcut,
self.rcut_smth,
)
sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt(
env_mat.detach().cpu().numpy(), self.ndescrpt, system["natoms"]
env_mat.detach().cpu().numpy(), self.ndescrpt, natoms
)
sumr.append(sysr)
suma.append(sysa)
Expand Down
37 changes: 29 additions & 8 deletions deepmd/pt/model/descriptor/se_atten.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from deepmd.pt.utils import (
env,
)
from deepmd.pt.utils.nlist import (
process_input,
)


@DescriptorBlock.register("se_atten")
Expand Down Expand Up @@ -161,6 +164,12 @@
"""Returns the output dimension of embedding."""
return self.filter_neuron[-1]

def distinguish_types(self) -> bool:
"""Returns if the descriptor requires a neighbor list that distinguish different
atomic types or not.
"""
return False

@property
def dim_out(self):
"""Returns the output dimension of this descriptor."""
Expand All @@ -185,29 +194,41 @@
suma2 = []
mixed_type = "real_natoms_vec" in merged[0]
for system in merged:
index = system["mapping"].unsqueeze(-1).expand(-1, -1, 3)
extended_coord = torch.gather(system["coord"], dim=1, index=index)
extended_coord = extended_coord - system["shift"]
coord, atype, box, natoms = (
system["coord"],
system["atype"],
system["box"],
system["natoms"],
)
extended_coord, extended_atype, mapping, nlist = process_input(
coord,
atype,
self.get_rcut(),
self.get_sel(),
distinguish_types=self.distinguish_types(),
box=box,
)
env_mat, _, _ = prod_env_mat_se_a(
extended_coord,
system["nlist"],
system["atype"],
nlist,
atype,
self.mean,
self.stddev,
self.rcut,
self.rcut_smth,
)
if not mixed_type:
sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt(
env_mat.detach().cpu().numpy(), self.ndescrpt, system["natoms"]
env_mat.detach().cpu().numpy(), self.ndescrpt, natoms
)
else:
real_natoms_vec = system["real_natoms_vec"]

Check warning on line 225 in deepmd/pt/model/descriptor/se_atten.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/model/descriptor/se_atten.py#L225

Added line #L225 was not covered by tests
sysr, sysr2, sysa, sysa2, sysn = analyze_descrpt(
env_mat.detach().cpu().numpy(),
self.ndescrpt,
system["real_natoms_vec"],
real_natoms_vec,
mixed_type=mixed_type,
real_atype=system["atype"].detach().cpu().numpy(),
real_atype=atype.detach().cpu().numpy(),
)
sumr.append(sysr)
suma.append(sysa)
Expand Down
26 changes: 5 additions & 21 deletions deepmd/pt/model/model/make_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,8 @@
fit_output_to_model_output,
)
from deepmd.pt.utils.nlist import (
build_neighbor_list,
extend_coord_with_ghosts,
nlist_distinguish_types,
)
from deepmd.pt.utils.region import (
normalize_coord,
process_input,
)


Expand Down Expand Up @@ -93,26 +89,14 @@ def forward_common(
The keys are defined by the `ModelOutputDef`.

"""
nframes, nloc = atype.shape[:2]
if box is not None:
coord_normalized = normalize_coord(
coord.view(nframes, nloc, 3),
box.reshape(nframes, 3, 3),
)
else:
coord_normalized = coord.clone()
extended_coord, extended_atype, mapping = extend_coord_with_ghosts(
coord_normalized, atype, box, self.get_rcut()
)
nlist = build_neighbor_list(
extended_coord,
extended_atype,
nloc,
extended_coord, extended_atype, mapping, nlist = process_input(
coord,
atype,
self.get_rcut(),
self.get_sel(),
distinguish_types=self.distinguish_types(),
box=box,
)
extended_coord = extended_coord.view(nframes, -1, 3)
model_predict_lower = self.forward_common_lower(
extended_coord,
extended_atype,
Expand Down
21 changes: 1 addition & 20 deletions deepmd/pt/utils/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,26 +267,7 @@ def collate_batch(batch):
example = batch[0]
result = example.copy()
for key in example.keys():
if key == "shift" or key == "mapping":
natoms_extended = max([d[key].shape[0] for d in batch])
n_frames = len(batch)
list = []
for x in range(n_frames):
list.append(batch[x][key])
if key == "shift":
result[key] = torch.zeros(
(n_frames, natoms_extended, 3),
dtype=env.GLOBAL_PT_FLOAT_PRECISION,
)
else:
result[key] = torch.zeros(
(n_frames, natoms_extended),
dtype=torch.long,
)
for i in range(len(batch)):
natoms_tmp = list[i].shape[0]
result[key][i, :natoms_tmp] = list[i]
elif "find_" in key:
if "find_" in key:
result[key] = batch[0][key]
else:
if batch[0][key] is None:
Expand Down
Loading