Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(pt/tf/dp): support econf type embedding #3781

Merged
merged 10 commits into from
May 16, 2024
Merged
14 changes: 14 additions & 0 deletions deepmd/dpmodel/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,12 @@ class DescrptDPA1(NativeOP, BaseDescriptor):
Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.

spin
(Only support None to keep consistent with other backend references.)
(Not used in this version. Not-none option is not implemented.)
Expand Down Expand Up @@ -242,6 +248,8 @@ def __init__(
concat_output_tebd: bool = True,
spin: Optional[Any] = None,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
# consistent with argcheck, not used though
seed: Optional[int] = None,
) -> None:
Expand Down Expand Up @@ -287,12 +295,16 @@ def __init__(
trainable_ln=trainable_ln,
ln_eps=ln_eps,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
neuron=[tebd_dim],
padding=True,
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.tebd_dim = tebd_dim
self.concat_output_tebd = concat_output_tebd
Expand Down Expand Up @@ -457,6 +469,8 @@ def serialize(self) -> dict:
"smooth_type_embedding": obj.smooth,
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
# make deterministic
"precision": np.dtype(PRECISION_DICT[obj.precision]).name,
"embeddings": obj.embeddings.serialize(),
Expand Down
13 changes: 13 additions & 0 deletions deepmd/dpmodel/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.

Expand Down Expand Up @@ -350,6 +352,11 @@ def __init__(
(Unused yet) Random seed for parameter initialization.
add_tebd_to_repinit_out : bool, optional
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
iProzd marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
Expand Down Expand Up @@ -433,12 +440,16 @@ def init_subclass_params(sub_data, sub_class):
trainable_ln=self.repformer_args.trainable_ln,
ln_eps=self.repformer_args.ln_eps,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes=ntypes,
neuron=[self.repinit_args.tebd_dim],
padding=True,
activation_function="Linear",
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.concat_output_tebd = concat_output_tebd
self.precision = precision
Expand Down Expand Up @@ -641,6 +652,8 @@ def serialize(self) -> dict:
"env_protection": self.env_protection,
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"type_embedding": self.type_embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
}
Expand Down
45 changes: 41 additions & 4 deletions deepmd/dpmodel/utils/type_embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,11 @@ class TypeEmbedNet(NativeOP):
Random seed for initializing the network parameters.
padding
Concat the zero padding to the output, as the default embedding of empty type.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
"""

def __init__(
Expand All @@ -52,6 +57,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
padding: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
) -> None:
self.ntypes = ntypes
self.neuron = neuron
Expand All @@ -61,8 +68,33 @@ def __init__(
self.activation_function = str(activation_function)
self.trainable = trainable
self.padding = padding
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
embed_input_dim = ntypes
if self.use_econf_tebd:
from deepmd.utils.econf_embd import (
ECONF_DIM,
electronic_configuration_embedding,
)
from deepmd.utils.econf_embd import type_map as periodic_table

assert (
self.type_map is not None
), "When using electronic configuration type embedding, type_map must be provided!"

missing_types = [t for t in self.type_map if t not in periodic_table]
assert not missing_types, (
"When using electronic configuration type embedding, "
"all element in type_map should be in periodic table! "
f"Found these invalid elements: {missing_types}"
)
self.econf_tebd = np.array(
[electronic_configuration_embedding[kk] for kk in self.type_map],
dtype=PRECISION_DICT[self.precision],
)
embed_input_dim = ECONF_DIM
self.embedding_net = EmbeddingNet(
ntypes,
embed_input_dim,
self.neuron,
self.activation_function,
self.resnet_dt,
Expand All @@ -71,9 +103,12 @@ def __init__(

def call(self) -> np.ndarray:
"""Compute the type embedding network."""
embed = self.embedding_net(
np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
)
if not self.use_econf_tebd:
embed = self.embedding_net(
np.eye(self.ntypes, dtype=PRECISION_DICT[self.precision])
)
else:
embed = self.embedding_net(self.econf_tebd)
if self.padding:
embed = np.pad(embed, ((0, 1), (0, 0)), mode="constant")
return embed
Expand Down Expand Up @@ -120,5 +155,7 @@ def serialize(self) -> dict:
"activation_function": self.activation_function,
"trainable": self.trainable,
"padding": self.padding,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"embedding": self.embedding_net.serialize(),
}
19 changes: 18 additions & 1 deletion deepmd/pt/model/descriptor/dpa1.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,11 @@ class DescrptDPA1(BaseDescriptor, torch.nn.Module):
Setting this parameter to `True` is equivalent to setting `tebd_input_mode` to 'strip'.
Setting it to `False` is equivalent to setting `tebd_input_mode` to 'concat'.
The default value is `None`, which means the `tebd_input_mode` setting will be used instead.
use_econf_tebd: bool, Optional
Whether to use electronic configuration type embedding.
type_map: List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.
spin
(Only support None to keep consistent with other backend references.)
(Not used in this version. Not-none option is not implemented.)
Expand Down Expand Up @@ -220,6 +225,8 @@ def __init__(
smooth_type_embedding: bool = True,
type_one_side: bool = False,
stripped_type_embedding: Optional[bool] = None,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
# not implemented
spin=None,
type: Optional[str] = None,
Expand Down Expand Up @@ -270,7 +277,15 @@ def __init__(
ln_eps=ln_eps,
old_impl=old_impl,
)
self.type_embedding = TypeEmbedNet(ntypes, tebd_dim, precision=precision)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes,
tebd_dim,
precision=precision,
use_econf_tebd=use_econf_tebd,
type_map=type_map,
)
self.tebd_dim = tebd_dim
self.concat_output_tebd = concat_output_tebd
self.trainable = trainable
Expand Down Expand Up @@ -415,6 +430,8 @@ def serialize(self) -> dict:
"smooth_type_embedding": obj.smooth,
"type_one_side": obj.type_one_side,
"concat_output_tebd": self.concat_output_tebd,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
# make deterministic
"precision": RESERVED_PRECISON_DICT[obj.prec],
"embeddings": obj.filter_layers.serialize(),
Expand Down
17 changes: 16 additions & 1 deletion deepmd/pt/model/descriptor/dpa2.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,8 @@ def __init__(
trainable: bool = True,
seed: Optional[int] = None,
add_tebd_to_repinit_out: bool = False,
use_econf_tebd: bool = False,
type_map: Optional[List[str]] = None,
old_impl: bool = False,
):
r"""The DPA-2 descriptor. see https://arxiv.org/abs/2312.15492.
Expand Down Expand Up @@ -104,6 +106,11 @@ def __init__(
(Unused yet) Random seed for parameter initialization.
add_tebd_to_repinit_out : bool, optional
Whether to add type embedding to the output representation from repinit before inputting it into repformer.
use_econf_tebd : bool, Optional
Whether to use electronic configuration type embedding.
type_map : List[str], Optional
A list of strings. Give the name to each type of atoms.
Only used if `use_econf_tebd` is `True` in type embedding net.

Returns
-------
Expand Down Expand Up @@ -189,8 +196,14 @@ def init_subclass_params(sub_data, sub_class):
ln_eps=self.repformer_args.ln_eps,
old_impl=old_impl,
)
self.use_econf_tebd = use_econf_tebd
self.type_map = type_map
self.type_embedding = TypeEmbedNet(
ntypes, self.repinit_args.tebd_dim, precision=precision
ntypes,
self.repinit_args.tebd_dim,
precision=precision,
use_econf_tebd=self.use_econf_tebd,
type_map=type_map,
)
self.concat_output_tebd = concat_output_tebd
self.precision = precision
Expand Down Expand Up @@ -368,6 +381,8 @@ def serialize(self) -> dict:
"env_protection": self.env_protection,
"trainable": self.trainable,
"add_tebd_to_repinit_out": self.add_tebd_to_repinit_out,
"use_econf_tebd": self.use_econf_tebd,
"type_map": self.type_map,
"type_embedding": self.type_embedding.embedding.serialize(),
"g1_shape_tranform": self.g1_shape_tranform.serialize(),
}
Expand Down
4 changes: 4 additions & 0 deletions deepmd/pt/model/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@
ntypes = len(model_params["type_map"])
# descriptor
model_params["descriptor"]["ntypes"] = ntypes
if model_params["descriptor"].get("use_econf_tebd", False):
model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])

Check warning on line 111 in deepmd/pt/model/model/__init__.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/model/model/__init__.py#L111

Added line #L111 was not covered by tests
descriptor = BaseDescriptor(**model_params["descriptor"])
# fitting
fitting_net = model_params.get("fitting_net", None)
Expand Down Expand Up @@ -152,6 +154,8 @@
ntypes = len(model_params["type_map"])
# descriptor
model_params["descriptor"]["ntypes"] = ntypes
if model_params["descriptor"].get("use_econf_tebd", False):
model_params["descriptor"]["type_map"] = copy.deepcopy(model_params["type_map"])

Check warning on line 158 in deepmd/pt/model/model/__init__.py

View check run for this annotation

Codecov / codecov/patch

deepmd/pt/model/model/__init__.py#L158

Added line #L158 was not covered by tests
descriptor = BaseDescriptor(**model_params["descriptor"])
# fitting
fitting_net = model_params.get("fitting_net", None)
Expand Down
Loading