Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add init-reaction docs and args #758

Merged
merged 5 commits into from
Jul 5, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ DPGEN's documentation

init/init-bulk-mdata
init/init-surf-mdata
init/init-reaction
init/init-reaction-jdata
init/init-reaction-mdata

.. _autotest::
Expand Down
6 changes: 6 additions & 0 deletions doc/init/init-reaction-jdata.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
dpgen init_reaction parameters
======================================

.. dargs::
:module: dpgen.data.arginfo
:func: init_reaction_jdata_arginfo
21 changes: 21 additions & 0 deletions doc/init/init-reaction.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# init_reaction

`dpgen init_reaction` is a workflow to initilize data for reactive systems of small gas-phase molecules. The workflow was introduced in the "Initialization" section of [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211).

To start the workflow, one needs a box containing reactive systems. The following packages are required for each of the step:
- Exploring: [LAMMPS](https://github.com/lammps/lammps)
- Sampling: [MDDatasetBuilder](https://github.com/tongzhugroup/mddatasetbuilder)
- Labeling: [Gaussian](https://gaussian.com/)

The Exploring step uses LAMMPS [pair_style reaxff](https://docs.lammps.org/latest/pair_reaxff.html) to run a short ReaxMD NVT MD simulation. In the Sampling step, molecular clusters are taken and k-means clustering algorithm is applied to remove the redundancy, which is described in [Nature Communications, 11, 5713 (2020)](https://doi.org/10.1038/s41467-020-19497-z). The Labeling step calculates energies and forces using the Gaussian package.

An example of `reaction.json` is given below:

```{literalinclude} ../../examples/init/reaction.json
:language: json
:linenos:
```

For detailed parameters, see [parametes](init-reaction-jdata.rst) and [machine parameters](init-reaction-mdata.rst).

The genereated data can be used to continue DP-GEN concurrent learning workflow. Read [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211) for details.
43 changes: 42 additions & 1 deletion dpgen/data/arginfo.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from dargs import Argument
from dargs import Argument, ArgumentEncoder

from dpgen.arginfo import general_mdata_arginfo

Expand Down Expand Up @@ -34,3 +34,44 @@ def init_reaction_mdata_arginfo() -> Argument:
arginfo
"""
return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp"))


def init_reaction_jdata_arginfo() -> Argument:
"""Generate arginfo for dpgen init_reaction jdata.

Returns
-------
Argument
dpgen init_reaction jdata arginfo
"""
doc_init_reaction = "Generate initial data for reactive systems for small gas-phase molecules, from a ReaxFF NVT MD trajectory."
doc_type_map = "Type map, which should match types in the initial data. e.g. [\"C\", \"H\", \"O\"]"
doc_reaxff = "Parameters for ReaxFF NVT MD."
doc_data = "Path to initial LAMMPS data file. The atom_style should be charge."
doc_ff = "Path to ReaxFF force field file. Available in the lammps/potentials directory."
doc_control = "Path to ReaxFF control file."
doc_temp = "Target Temperature for the NVT MD simulation. Unit: K."
doc_dt = "Real time for every time step. Unit: fs."
doc_tau_t = "Time to determine how rapidly the temperature. Unit: fs."
doc_dump_frep = "Frequency of time steps to collect trajectory."
doc_nstep = "Total steps to run the ReaxFF MD simulation."
doc_cutoff = "Cutoff radius to take clusters from the trajectory. Note that only a complete molecule or free radical will be taken."
doc_dataset_size = "Collected dataset size for each bond type."
doc_qmkeywords = "Gaussian keywords for first-principle calculations. e.g. force mn15/6-31g** Geom=PrintInputOrient. Note that \"force\" job is necessary to collect data. Geom=PrintInputOrient should be used when there are more than 50 atoms in a cluster."

return Argument("init_reaction_jdata", dict, [
Argument("type_map", list, doc=doc_type_map),
Argument("reaxff", dict, [
Argument("data", str, doc=doc_data),
Argument("ff", str, doc=doc_ff),
Argument("control", str, doc=doc_control),
Argument("temp", [float, int], doc=doc_temp),
Argument("dt", [float, int], doc=doc_dt),
Argument("tau_t", [float, int], doc=doc_tau_t),
Argument("dump_freq", int, doc=doc_dump_frep),
Argument("nstep", int, doc=doc_nstep),
], doc=doc_reaxff),
Argument("cutoff", float, doc=doc_cutoff),
Argument("dataset_size", int, doc=doc_dataset_size),
Argument("qmkeywords", str, doc=doc_qmkeywords),
], doc=doc_init_reaction)
6 changes: 5 additions & 1 deletion dpgen/data/reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@
from dpgen.dispatcher.Dispatcher import make_submission_compat
from dpgen.remote.decide_machine import convert_mdata
from dpgen.generator.run import create_path, make_fp_task_name
from dpgen.util import sepline
from dpgen.util import sepline, normalize
from .arginfo import init_reaction_jdata_arginfo

reaxff_path = "00.reaxff"
build_path = "01.build"
Expand Down Expand Up @@ -207,6 +208,9 @@ def gen_init_reaction(args):
with open(args.MACHINE, "r") as fp:
mdata = json.load(fp)

jdata_arginfo = init_reaction_jdata_arginfo()
jdata = normalize(jdata_arginfo, jdata)

mdata = convert_mdata(mdata, ["reaxff", "build", "fp"])
record = "record.reaction"
iter_rec = -1
Expand Down
23 changes: 23 additions & 0 deletions dpgen/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from typing import Union, List
from pathlib import Path

from dargs import Argument

from dpgen import dlog

"""
Expand Down Expand Up @@ -47,3 +49,24 @@ def expand_sys_str(root_dir: Union[str, Path]) -> List[str]:
if (root_dir / "type.raw").is_file():
matches.append(str(root_dir))
return matches

def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict:
"""Normalize and check input data.

Parameters
----------
arginfo : dargs.Argument
argument information
data : dict
input data
strict_check : bool, default=True
strict check data or not

Returns
-------
dict
normalized data
"""
data = arginfo.normalize_value(data, trim_pattern="_*")
arginfo.check_value(data, strict=strict_check)
return data
2 changes: 1 addition & 1 deletion examples/init/reaction.json
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,5 @@
},
"cutoff": 3.5,
"dataset_size": 100,
"qmkeywords": "b3lyp/6-31g** force"
"qmkeywords": "b3lyp/6-31g** force Geom=PrintInputOrient"
}
32 changes: 32 additions & 0 deletions tests/test_check_examples.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""This module ensures input in the examples directory
could pass the argument checking.
"""
import unittest
import json
from pathlib import Path

from dpgen.util import normalize
from dpgen.data.arginfo import (
init_reaction_jdata_arginfo,
)

init_reaction_jdata = init_reaction_jdata_arginfo()

# directory of examples
p_examples = Path(__file__).parent.parent / "examples"

# input_files : tuple[tuple[Argument, Path]]
# tuple of example list
input_files = (
(init_reaction_jdata, p_examples / "init" / "reaction.json"),
)


class TestExamples(unittest.TestCase):
def test_arguments(self):
for arginfo, fn in input_files:
fn = str(fn)
with self.subTest(fn=fn):
with open(fn) as f:
data = json.load(f)
normalize(arginfo, data)