diff --git a/doc/index.rst b/doc/index.rst index 6eea4d95b..04f006883 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -34,6 +34,8 @@ DPGEN's documentation init/init-bulk-mdata init/init-surf-mdata + init/init-reaction + init/init-reaction-jdata init/init-reaction-mdata .. _autotest:: diff --git a/doc/init/init-reaction-jdata.rst b/doc/init/init-reaction-jdata.rst new file mode 100644 index 000000000..253cae682 --- /dev/null +++ b/doc/init/init-reaction-jdata.rst @@ -0,0 +1,6 @@ +dpgen init_reaction parameters +====================================== + +.. dargs:: + :module: dpgen.data.arginfo + :func: init_reaction_jdata_arginfo diff --git a/doc/init/init-reaction.md b/doc/init/init-reaction.md new file mode 100644 index 000000000..c7366dcbf --- /dev/null +++ b/doc/init/init-reaction.md @@ -0,0 +1,21 @@ +# init_reaction + +`dpgen init_reaction` is a workflow to initilize data for reactive systems of small gas-phase molecules. The workflow was introduced in the "Initialization" section of [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211). + +To start the workflow, one needs a box containing reactive systems. The following packages are required for each of the step: +- Exploring: [LAMMPS](https://github.com/lammps/lammps) +- Sampling: [MDDatasetBuilder](https://github.com/tongzhugroup/mddatasetbuilder) +- Labeling: [Gaussian](https://gaussian.com/) + +The Exploring step uses LAMMPS [pair_style reaxff](https://docs.lammps.org/latest/pair_reaxff.html) to run a short ReaxMD NVT MD simulation. In the Sampling step, molecular clusters are taken and k-means clustering algorithm is applied to remove the redundancy, which is described in [Nature Communications, 11, 5713 (2020)](https://doi.org/10.1038/s41467-020-19497-z). The Labeling step calculates energies and forces using the Gaussian package. + +An example of `reaction.json` is given below: + +```{literalinclude} ../../examples/init/reaction.json +:language: json +:linenos: +``` + +For detailed parameters, see [parametes](init-reaction-jdata.rst) and [machine parameters](init-reaction-mdata.rst). + +The genereated data can be used to continue DP-GEN concurrent learning workflow. Read [Energy & Fuels, 2021, 35 (1), 762–769](https://10.1021/acs.energyfuels.0c03211) for details. diff --git a/dpgen/data/arginfo.py b/dpgen/data/arginfo.py index d5814c036..fea20ae65 100644 --- a/dpgen/data/arginfo.py +++ b/dpgen/data/arginfo.py @@ -1,4 +1,4 @@ -from dargs import Argument +from dargs import Argument, ArgumentEncoder from dpgen.arginfo import general_mdata_arginfo @@ -34,3 +34,44 @@ def init_reaction_mdata_arginfo() -> Argument: arginfo """ return general_mdata_arginfo("init_reaction_mdata", ("reaxff", "build", "fp")) + + +def init_reaction_jdata_arginfo() -> Argument: + """Generate arginfo for dpgen init_reaction jdata. + + Returns + ------- + Argument + dpgen init_reaction jdata arginfo + """ + doc_init_reaction = "Generate initial data for reactive systems for small gas-phase molecules, from a ReaxFF NVT MD trajectory." + doc_type_map = "Type map, which should match types in the initial data. e.g. [\"C\", \"H\", \"O\"]" + doc_reaxff = "Parameters for ReaxFF NVT MD." + doc_data = "Path to initial LAMMPS data file. The atom_style should be charge." + doc_ff = "Path to ReaxFF force field file. Available in the lammps/potentials directory." + doc_control = "Path to ReaxFF control file." + doc_temp = "Target Temperature for the NVT MD simulation. Unit: K." + doc_dt = "Real time for every time step. Unit: fs." + doc_tau_t = "Time to determine how rapidly the temperature. Unit: fs." + doc_dump_frep = "Frequency of time steps to collect trajectory." + doc_nstep = "Total steps to run the ReaxFF MD simulation." + doc_cutoff = "Cutoff radius to take clusters from the trajectory. Note that only a complete molecule or free radical will be taken." + doc_dataset_size = "Collected dataset size for each bond type." + doc_qmkeywords = "Gaussian keywords for first-principle calculations. e.g. force mn15/6-31g** Geom=PrintInputOrient. Note that \"force\" job is necessary to collect data. Geom=PrintInputOrient should be used when there are more than 50 atoms in a cluster." + + return Argument("init_reaction_jdata", dict, [ + Argument("type_map", list, doc=doc_type_map), + Argument("reaxff", dict, [ + Argument("data", str, doc=doc_data), + Argument("ff", str, doc=doc_ff), + Argument("control", str, doc=doc_control), + Argument("temp", [float, int], doc=doc_temp), + Argument("dt", [float, int], doc=doc_dt), + Argument("tau_t", [float, int], doc=doc_tau_t), + Argument("dump_freq", int, doc=doc_dump_frep), + Argument("nstep", int, doc=doc_nstep), + ], doc=doc_reaxff), + Argument("cutoff", float, doc=doc_cutoff), + Argument("dataset_size", int, doc=doc_dataset_size), + Argument("qmkeywords", str, doc=doc_qmkeywords), + ], doc=doc_init_reaction) diff --git a/dpgen/data/reaction.py b/dpgen/data/reaction.py index 5e900f9de..51be3b111 100644 --- a/dpgen/data/reaction.py +++ b/dpgen/data/reaction.py @@ -18,7 +18,8 @@ from dpgen.dispatcher.Dispatcher import make_submission_compat from dpgen.remote.decide_machine import convert_mdata from dpgen.generator.run import create_path, make_fp_task_name -from dpgen.util import sepline +from dpgen.util import sepline, normalize +from .arginfo import init_reaction_jdata_arginfo reaxff_path = "00.reaxff" build_path = "01.build" @@ -207,6 +208,9 @@ def gen_init_reaction(args): with open(args.MACHINE, "r") as fp: mdata = json.load(fp) + jdata_arginfo = init_reaction_jdata_arginfo() + jdata = normalize(jdata_arginfo, jdata) + mdata = convert_mdata(mdata, ["reaxff", "build", "fp"]) record = "record.reaction" iter_rec = -1 diff --git a/dpgen/util.py b/dpgen/util.py index 9491cdc30..bab822a83 100644 --- a/dpgen/util.py +++ b/dpgen/util.py @@ -3,6 +3,8 @@ from typing import Union, List from pathlib import Path +from dargs import Argument + from dpgen import dlog """ @@ -47,3 +49,24 @@ def expand_sys_str(root_dir: Union[str, Path]) -> List[str]: if (root_dir / "type.raw").is_file(): matches.append(str(root_dir)) return matches + +def normalize(arginfo: Argument, data: dict, strict_check: bool = True) -> dict: + """Normalize and check input data. + + Parameters + ---------- + arginfo : dargs.Argument + argument information + data : dict + input data + strict_check : bool, default=True + strict check data or not + + Returns + ------- + dict + normalized data + """ + data = arginfo.normalize_value(data, trim_pattern="_*") + arginfo.check_value(data, strict=strict_check) + return data diff --git a/examples/init/reaction.json b/examples/init/reaction.json index 46e327a9f..12322333c 100644 --- a/examples/init/reaction.json +++ b/examples/init/reaction.json @@ -15,5 +15,5 @@ }, "cutoff": 3.5, "dataset_size": 100, - "qmkeywords": "b3lyp/6-31g** force" + "qmkeywords": "b3lyp/6-31g** force Geom=PrintInputOrient" } \ No newline at end of file diff --git a/tests/test_check_examples.py b/tests/test_check_examples.py new file mode 100644 index 000000000..102e98490 --- /dev/null +++ b/tests/test_check_examples.py @@ -0,0 +1,32 @@ +"""This module ensures input in the examples directory +could pass the argument checking. +""" +import unittest +import json +from pathlib import Path + +from dpgen.util import normalize +from dpgen.data.arginfo import ( + init_reaction_jdata_arginfo, +) + +init_reaction_jdata = init_reaction_jdata_arginfo() + +# directory of examples +p_examples = Path(__file__).parent.parent / "examples" + +# input_files : tuple[tuple[Argument, Path]] +# tuple of example list +input_files = ( + (init_reaction_jdata, p_examples / "init" / "reaction.json"), +) + + +class TestExamples(unittest.TestCase): + def test_arguments(self): + for arginfo, fn in input_files: + fn = str(fn) + with self.subTest(fn=fn): + with open(fn) as f: + data = json.load(f) + normalize(arginfo, data)