Merge pull request #216 from amcadmus/master

Merge recent development on devel into master
deepmodeling · Nov 14, 2021 · 1d1084d · 1d1084d
2 parents ea32d45 + f1859eb
commit 1d1084d
Show file tree

Hide file tree

Showing 139 changed files with 9,257 additions and 256 deletions.
diff --git a/.github/workflows/mirror_gitee.yml b/.github/workflows/mirror_gitee.yml
@@ -0,0 +1,19 @@
+name: Mirror to Gitee Repo
+
+on: [ push, delete, create ]
+
+# Ensures that only one mirror task will run at a time.
+concurrency:
+  group: git-mirror
+
+jobs:
+  git-mirror:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: wearerequired/git-mirror-action@v1
+        env:
+          ORGANIZATION: deepmodeling
+          SSH_PRIVATE_KEY: ${{ secrets.SYNC_GITEE_PRIVATE_KEY }}
+        with:
+          source-repo: "https://github.com/deepmodeling/dpdata.git"
+          destination-repo: "git@gitee.com:deepmodeling/dpdata.git"
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -23,7 +23,7 @@ jobs:
     - name: Install rdkit
       run: conda create -c conda-forge -n my-rdkit-env python=${{ matrix.python-version }} rdkit openbabel;
     - name: Install dependencies
-      run: source $CONDA/bin/activate my-rdkit-env && pip install .[amber] coverage codecov
+      run: source $CONDA/bin/activate my-rdkit-env && pip install .[amber,ase,pymatgen] coverage codecov
     - name: Test
       run: source $CONDA/bin/activate my-rdkit-env && cd tests && coverage run --source=../dpdata -m unittest && cd .. && coverage combine tests/.coverage && coverage report
     - name: Run codecov

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-**dpdata** is a python package for manipulating DeePMD-kit, VASP, LAMMPS data formats.
+**dpdata** is a python package for manipulating data formats of software in computational science, including DeePMD-kit, VASP, LAMMPS, GROMACS, Gaussian.
 dpdata only works with python 3.x.
 
 
@@ -80,6 +80,8 @@ The `System` or `LabeledSystem` can be constructed from the following file forma
 | Amber   | multi       | True         | True    | LabeledSystem | 'amber/md'           |
 | Amber/sqm | sqm.out   | False        | False   | System        | 'sqm/out'            |
 | Gromacs | gro         | True         | False   | System        | 'gromacs/gro'        |
+| ABACUS  | STRU        | False        | True    | LabeledSystem | 'abacus/scf'         |
+| ABACUS  | cif         | True         | True    | LabeledSystem | 'abacus/md'          |
 
 
 The Class `dpdata.MultiSystems`  can read data  from a dir which may contains many files of different systems, or from single xyz file which contains different systems.
@@ -116,7 +118,7 @@ xyz_multi_systems.to_deepmd_raw('./my_deepmd_data/')
 ```
 
 You may also use the following code to parse muti-system:
-```
+```python
 from dpdata import LabeledSystem,MultiSystems
 from glob import glob
 """
@@ -255,7 +257,7 @@ If a valence of 3 is detected on carbon, the formal charge will be assigned to -
 
 # Plugins
 
-One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's crirical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in `setup.py`:
+One can follow [a simple example](plugin_example/) to add their own format by creating and installing plugins. It's critical to add the [Format](dpdata/format.py) class to `entry_points['dpdata.plugins']` in `setup.py`:
 ```py
     entry_points={
         'dpdata.plugins': [

diff --git a/docs/conf.py b/docs/conf.py
@@ -14,13 +14,14 @@
 #
 import os
 import sys
+from datetime import date
 sys.path.insert(0, os.path.abspath('..'))
 
 
 # -- Project information -----------------------------------------------------
 
 project = 'dpdata'
-copyright = '2019, Han Wang'
+copyright = '2019-%d, Deep Modeling ' % date.today().year
 author = 'Han Wang'
 
 # The short X.Y version
@@ -39,11 +40,12 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinxcontrib.napoleon',
+    'sphinx_rtd_theme',
     'sphinx.ext.mathjax',
     'sphinx.ext.viewcode',
-    'sphinx.ext.githubpages',
-    'm2r',
+    'sphinx.ext.intersphinx',
+    'numpydoc',
+    'm2r2',
 ]
 
 # Add any paths that contain templates here, relative to this directory.
@@ -79,7 +81,7 @@
 # The theme to use for HTML and HTML Help pages.  See the documentation for
 # a list of builtin themes.
 #
-html_theme = 'alabaster'
+html_theme = 'sphinx_rtd_theme'
 
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
@@ -90,7 +92,7 @@
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+#html_static_path = ['_static']
 
 # Custom sidebar templates, must be a dictionary that maps document names
 # to template names.
@@ -161,3 +163,18 @@
 
 
 # -- Extension configuration -------------------------------------------------
+def run_apidoc(_):
+    from sphinx.ext.apidoc import main
+    sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+    cur_dir = os.path.abspath(os.path.dirname(__file__))
+    module = os.path.join(cur_dir, "..", "dpdata")
+    main(['-M', '--tocfile', 'api', '-H', 'API documentation', '-o', os.path.join(cur_dir, "api"), module, '--force'])
+
+def setup(app):
+    app.connect('builder-inited', run_apidoc)
+
+
+intersphinx_mapping = {
+    "numpy": ("https://docs.scipy.org/doc/numpy/", None),
+    "python": ("https://docs.python.org/", None),
+}
diff --git a/docs/index.rst b/docs/index.rst
@@ -10,22 +10,11 @@ Welcome to dpdata's documentation!
    :maxdepth: 2
    :caption: Contents:
 
+   api/api
 
 .. mdinclude:: ../README.md
 
 
-API documentation
-=================
-
-.. automodule:: dpdata
-
-.. autoclass:: System
-    :members: __init__, __getitem__, get_nframes, get_natoms, sub_system, append, apply_pbc, to_lammps_lmp, to_vasp_poscar
-
-.. autoclass:: LabeledSystem
-    :members: __init__, sub_system, to_deepmd_raw, to_deepmd_npy
-
-
 Indices and tables
 ==================
 

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -0,0 +1 @@
+.[docs]
diff --git a/dpdata/abacus/md.py b/dpdata/abacus/md.py
@@ -0,0 +1,192 @@
+import os,sys
+import numpy as np
+from .scf import ry2ev, kbar2evperang3, get_block, get_geometry_in, get_cell, get_coords
+
+# Read in geometries from an ABACUS MD trajectory.
+# The atomic coordinates are read in from generated files in OUT.XXXX.
+# Energies, forces
+# IMPORTANT: the program defaultly takes STRU input file as standard cell information,
+# therefore the direct and cartesan coordinates read could be different from the ones in 
+# the output cif files!!!
+# It is highly recommanded to use ORTHOGANAL coordinates in STRU file if you wish to get
+# same coordinates in both dpdata and output cif files. 
+
+def get_path_out(fname, inlines):
+    # This function is different from the same-name function in scf.py.
+    # This function returns OUT.XXXX's base directory.
+    path_out = os.path.join(fname, "OUT.ABACUS/")
+    for line in inlines:
+        if  len(line)>0 and "suffix" in line and "suffix"==line.split()[0]:
+           suffix = line.split()[1]
+           path_out = os.path.join(fname, "OUT.%s/" % suffix)
+           break
+    return path_out
+
+def get_coord_dump_freq(inlines):
+    for line in inlines:
+        if  len(line)>0 and "md_dumpmdfred" in line and "md_dumpmdfred" == line.split()[0]:
+            return int(line.split()[1])
+    return 1
+
+# set up a cell according to cell info in cif file.
+# maybe useful later
+'''
+def setup_cell(a, b, c, alpha, beta, gamma):
+    cell = np.zeros(3, 3)
+    cell[0, 0] = a
+    cell[1, 0] = b*np.cos(gamma/180*np.pi)
+    cell[1, 1] = b*np.sin(gamma/180*np.pi)
+    cell[2, 0] = c*np.cos(beta/180*np.pi)
+    cell[2, 1] = c*(b*np.cos(alpha/180*np.pi) - cell[1, 0]*np.cos(beta/180*np.pi))/cell[1, 1]
+    cell[2, 2] = np.sqrt(c**2 - cell[2, 0]**2 - cell[2, 1]**2)
+    return cell
+'''
+
+def get_single_coord_from_cif(pos_file, atom_names, natoms, cell):
+    assert(len(atom_names) == len(natoms))
+    nele = len(atom_names)
+    total_natoms = sum(natoms)
+    coord = np.zeros([total_natoms, 3])
+    a = 0
+    b = 0
+    c = 0
+    alpha = 0
+    beta = 0
+    gamma = 0
+    with open(pos_file, "r") as fp:
+        lines = fp.read().split("\n")
+    for line in lines:
+        if "_cell_length_a" in line:
+            a = float(line.split()[1])
+        if "_cell_length_b" in line:
+            b = float(line.split()[1])
+        if "_cell_length_c" in line:
+            c = float(line.split()[1])  
+        if "_cell_angle_alpha" in line:
+            alpha = float(line.split()[1])
+        if "_cell_angle_beta" in line:
+            beta = float(line.split()[1])
+        if "_cell_angle_gamma" in line:
+            gamma = float(line.split()[1])
+    assert(a > 0 and b > 0 and c > 0 and alpha > 0 and beta > 0 and gamma > 0)
+    #cell = setup_cell(a, b, c, alpha, beta, gamma)
+    coord_lines = get_block(lines=lines, keyword="_atom_site_fract_z", skip=0, nlines = total_natoms)
+
+    ia_idx = 0
+    for it in range(nele):
+        for ia in range(natoms[it]):
+            coord_line = coord_lines[ia_idx].split()
+            assert(coord_line[0] == atom_names[it])
+            coord[ia_idx, 0] = float(coord_line[1])
+            coord[ia_idx, 1] = float(coord_line[2])
+            coord[ia_idx, 2] = float(coord_line[3])
+            ia_idx+=1
+    coord = np.matmul(coord, cell)
+    # important! Coordinates are converted to Cartesian coordinate.
+    return coord
+
+
+def get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell):
+    total_natoms = sum(natoms)
+    #cell = np.zeros(ndump, 3, 3)
+    coords = np.zeros([ndump, total_natoms, 3])
+    pos_file = os.path.join(path_out, "STRU_READIN_ADJUST.cif")
+    # frame 0 file is different from any other frames
+    coords[0] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
+    for dump_idx in range(1, ndump):
+        pos_file = os.path.join(path_out, "md_pos_%d.cif" %(dump_idx*dump_freq))
+        #print("dump_idx = %s" %dump_idx)
+        coords[dump_idx] = get_single_coord_from_cif(pos_file, atom_names, natoms, cell)
+    return coords
+
+def get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names):
+    stress = None
+    total_natoms = sum(natoms)
+    for line in inlines:
+        if len(line)>0 and "stress" in line and "stress" == line.split()[0] and "1" == line.split()[1]:
+            stress = np.zeros([ndump, 3, 3])
+            break
+    if type(stress) != np.ndarray:
+        print("The ABACUS program has no stress output. Stress will not be read.")
+    nenergy = 0
+    nforce = 0
+    nstress = 0
+    energy = np.zeros(ndump)
+    force = np.zeros([ndump, total_natoms, 3])
+
+    for line_idx, line in enumerate(outlines):
+        if "final etot is" in line:
+            if nenergy%dump_freq == 0:
+                energy[int(nenergy/dump_freq)] = float(line.split()[-2])
+            nenergy+=1
+        if "TOTAL-FORCE (eV/Angstrom)" in line:
+            for iatom in range(0, total_natoms):
+                force_line = outlines[line_idx+5+iatom]
+                atom_force = [float(i) for i in force_line.split()[1:]]
+                assert(len(atom_force) == 3)
+                atom_force = np.array(atom_force)
+                if nforce%dump_freq == 0:
+                    force[int(nforce/dump_freq), iatom] = atom_force
+            nforce+=1
+            assert(nforce==nenergy)
+        if "TOTAL-STRESS (KBAR)" in line:
+            for idx in range(0, 3):
+                stress_line = outlines[line_idx+4+idx]
+                single_stress = [float(i) for i in stress_line.split()]
+                if len(single_stress) != 3:
+                    print(single_stress)
+                assert(len(single_stress) == 3)
+                single_stress = np.array(single_stress)
+                if nstress%dump_freq == 0:
+                    stress[int(nstress/dump_freq), idx] = single_stress
+            nstress+=1
+            assert(nstress==nforce)
+    if type(stress) == np.ndarray:
+        stress *= kbar2evperang3
+    return energy, force, stress
+
+
+def get_frame (fname):
+    if type(fname) == str:
+        # if the input parameter is only one string, it is assumed that it is the 
+        # base directory containing INPUT file;
+        path_in = os.path.join(fname, "INPUT")
+    else:
+        raise RuntimeError('invalid input')    
+    with open(path_in, 'r') as fp:
+        inlines = fp.read().split('\n')
+    geometry_path_in = get_geometry_in(fname, inlines) # base dir of STRU
+    path_out = get_path_out(fname, inlines) 
+
+    with open(geometry_path_in, 'r') as fp:
+        geometry_inlines = fp.read().split('\n')
+    celldm, cell = get_cell(geometry_inlines) 
+    atom_names, natoms, types, coords = get_coords(celldm, cell, geometry_inlines, inlines) 
+    # This coords is not to be used.
+    dump_freq = get_coord_dump_freq(inlines = inlines)
+    ndump = int(os.popen("ls -l %s | grep 'md_pos_' | wc -l" %path_out).readlines()[0])
+    # number of dumped geometry files
+    coords = get_coords_from_cif(ndump, dump_freq, atom_names, natoms, types, path_out, cell)
+
+    # TODO: Read in energies, forces and pressures.
+    with open(os.path.join(path_out, "running_md.log"), 'r') as fp:
+        outlines = fp.read().split('\n')
+    energy, force, stress = get_energy_force_stress(outlines, inlines, dump_freq, ndump, natoms, atom_names)
+    if type(stress) == np.ndarray:
+        stress *= np.linalg.det(cell)
+    data = {}
+    data['atom_names'] = atom_names
+    data['atom_numbs'] = natoms
+    data['atom_types'] = types
+    data['cells'] = np.zeros([ndump, 3, 3])
+    for idx in range(ndump):
+        data['cells'][:, :, :] = cell
+    data['coords'] = coords
+    data['energies'] = energy
+    data['forces'] = force
+    data['virials'] = stress
+    if type(data['virials']) != np.ndarray:
+        del data['virials']
+    data['orig'] = np.zeros(3)
+
+    return data
diff --git a/dpdata/abacus/scf.py b/dpdata/abacus/scf.py
@@ -1,11 +1,10 @@
 import os,sys
 import numpy as np
+from ..unit import EnergyConversion, PressureConversion, LengthConversion
 
-bohr2ang = 0.5291770
-ry2ev = 13.605698
-kbar2evperang3 = 1e3 / 1.6021892e6
-# The consts are cited from $ABACUS_ROOT/source/src_global/constant.h
-
+bohr2ang = LengthConversion("bohr", "angstrom").value()
+ry2ev = EnergyConversion("rydberg", "eV").value()
+kbar2evperang3 = PressureConversion("kbar", "eV/angstrom^3").value()
 
 def get_block (lines, keyword, skip = 0, nlines = None):
     ret = []
@@ -175,6 +174,6 @@ def get_frame (fname):
     # print("virial = ", data['virials'])
     return data
 
-if __name__ == "__main__":
-    path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
-    data = get_frame(path)
+#if __name__ == "__main__":
+#    path = "/home/lrx/work/12_ABACUS_dpgen_interface/dpdata/dpdata/tests/abacus.scf"
+#    data = get_frame(path)
diff --git a/dpdata/amber/md.py b/dpdata/amber/md.py
@@ -3,9 +3,11 @@
 from scipy.io import netcdf
 import numpy as np
 from dpdata.amber.mask import pick_by_amber_mask
+from dpdata.unit import EnergyConversion
+from ..periodic_table import ELEMENTS
 
-kcalmol2eV= 0.04336410390059322
-symbols = ['X', 'H', 'He', 'Li', 'Be', 'B', 'C', 'N', 'O', 'F', 'Ne', 'Na', 'Mg', 'Al', 'Si', 'P', 'S', 'Cl', 'Ar', 'K', 'Ca', 'Sc', 'Ti', 'V', 'Cr', 'Mn', 'Fe', 'Co', 'Ni', 'Cu', 'Zn', 'Ga', 'Ge', 'As', 'Se', 'Br', 'Kr', 'Rb', 'Sr', 'Y', 'Zr', 'Nb', 'Mo', 'Tc', 'Ru', 'Rh', 'Pd', 'Ag', 'Cd', 'In', 'Sn', 'Sb', 'Te', 'I', 'Xe', 'Cs', 'Ba', 'La', 'Ce', 'Pr', 'Nd', 'Pm', 'Sm', 'Eu', 'Gd', 'Tb', 'Dy', 'Ho', 'Er', 'Tm', 'Yb', 'Lu', 'Hf', 'Ta', 'W', 'Re', 'Os', 'Ir', 'Pt', 'Au', 'Hg', 'Tl', 'Pb', 'Bi', 'Po', 'At', 'Rn', 'Fr', 'Ra', 'Ac', 'Th', 'Pa', 'U', 'Np', 'Pu', 'Am', 'Cm', 'Bk', 'Cf', 'Es', 'Fm', 'Md', 'No', 'Lr', 'Rf', 'Db', 'Sg', 'Bh', 'Hs', 'Mt', 'Ds', 'Rg', 'Cn', 'Nh', 'Fl', 'Mc', 'Lv', 'Ts', 'Og']
+kcalmol2eV = EnergyConversion("kcal_mol", "eV").value()
+symbols = ['X'] + ELEMENTS
 
 energy_convert = kcalmol2eV
 force_convert = energy_convert