Skip to content

Commit

Permalink
Implement Support for pymatgen.core.Molecule (#200)
Browse files Browse the repository at this point in the history
* add .github/workflows/mirror_gitee.yml

* implement support for pymatgen.core.Molecule

* implement support for pymatgen.core.Molecule

* Change atom_types in dpdata/pymatgen/molecule.py

* Change doc string in dpdata/plugins/pymatgen.py

* add dpdata/pymatgin to setup.py

* modify molecule.py

* modify molecule.py

* modify molecule.py

* Support of pymatgen.Molecule in dpdata

* Update tests/test_pymatgen_molecule.py

* Moved "remove_pbc" from pymatgen/molecule.py to system.py
  • Loading branch information
tuoping authored Oct 18, 2021
1 parent 3e96c2c commit 9b35c81
Show file tree
Hide file tree
Showing 10 changed files with 170 additions and 13 deletions.
35 changes: 35 additions & 0 deletions dpdata/plugins/pymatgen.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from dpdata.format import Format
import dpdata.pymatgen.molecule
import numpy as np


@Format.register("pymatgen/structure")
Expand All @@ -22,6 +24,37 @@ def to_system(self, data, **kwargs):
return structures


@Format.register("pymatgen/molecule")
class PyMatgenMoleculeFormat(Format):
@Format.post("remove_pbc")
def from_system(self, file_name, **kwargs):
try:
from pymatgen.core import Molecule
except ModuleNotFoundError as e:
raise ImportError('No module pymatgen.Molecule') from e

return dpdata.pymatgen.molecule.to_system_data(file_name)

def to_system(self, data, **kwargs):
"""convert System to Pymatgen Molecule obj
"""
molecules = []
try:
from pymatgen.core import Molecule
except ModuleNotFoundError as e:
raise ImportError('No module pymatgen.Molecule') from e

species = []
for name, numb in zip(data['atom_names'], data['atom_numbs']):
species.extend([name]*numb)
data = dpdata.system.remove_pbc(data)
for ii in range(np.array(data['coords']).shape[0]):
molecule = Molecule(
species, data['coords'][ii])
molecules.append(molecule)
return molecules


@Format.register("pymatgen/computedstructureentry")
@Format.register_to("to_pymatgen_ComputedStructureEntry")
class PyMatgenCSEFormat(Format):
Expand All @@ -44,3 +77,5 @@ def to_labeled_system(self, data, *args, **kwargs):
entry = ComputedStructureEntry(structure, energy, data=csedata)
entries.append(entry)
return entries


Empty file added dpdata/pymatgen/__init__.py
Empty file.
26 changes: 26 additions & 0 deletions dpdata/pymatgen/molecule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import numpy as np
from pymatgen.core import Molecule
from collections import Counter
import dpdata

def to_system_data(file_name, protect_layer = 9) :
mol = Molecule.from_file(file_name)
elem_mol = list(str(site.species.elements[0]) for site in mol.sites)
elem_counter = Counter(elem_mol)
atom_names = list(elem_counter.keys())
atom_numbs = list(elem_counter.values())
atom_types = [list(atom_names).index(e) for e in elem_mol]
natoms = np.sum(atom_numbs)

tmpcoord = np.copy(mol.cart_coords)

system = {}
system['atom_names'] = atom_names
system['atom_numbs'] = atom_numbs
system['atom_types'] = np.array(atom_types, dtype = int)
# center = [c - h_cell_size for c in mol.center_of_mass]
system['orig'] = np.array([0, 0, 0])

system['coords'] = [tmpcoord]
system['cells'] = [10.0 * np.eye(3)]
return system
32 changes: 19 additions & 13 deletions dpdata/system.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from monty.serialization import loadfn,dumpfn
from dpdata.periodic_table import Element
from dpdata.amber.mask import pick_by_amber_mask, load_param_file
import dpdata

# ensure all plugins are loaded!
import dpdata.plugins
Expand Down Expand Up @@ -418,7 +419,7 @@ def extend(self, systems):
for system in systems:
self.append(system.copy())


def apply_pbc(self) :
"""
Append periodic boundary condition
Expand All @@ -428,6 +429,7 @@ def apply_pbc(self) :
self.data['coords'] = np.matmul(ncoord, self.data['cells'])


@post_funcs.register("remove_pbc")
def remove_pbc(self, protect_layer = 9):
"""
This method does NOT delete the definition of the cells, it
Expand All @@ -441,19 +443,8 @@ def remove_pbc(self, protect_layer = 9):
protect_layer : the protect layer between the atoms and the cell
boundary
"""
nframes = self.get_nframes()
natoms = self.get_natoms()
assert(protect_layer >= 0), "the protect_layer should be no less than 0"
for ff in range(nframes):
tmpcoord = self.data['coords'][ff]
cog = np.average(tmpcoord, axis = 0)
dist = tmpcoord - np.tile(cog, [natoms, 1])
max_dist = np.max(np.linalg.norm(dist, axis = 1))
h_cell_size = max_dist + protect_layer
cell_size = h_cell_size * 2
shift = np.array([1,1,1]) * h_cell_size - cog
self.data['coords'][ff] = self.data['coords'][ff] + np.tile(shift, [natoms, 1])
self.data['cells'][ff] = cell_size * np.eye(3)
remove_pbc(self.data, protect_layer)

def affine_map(self, trans, f_idx = 0) :
assert(np.linalg.det(trans) != 0)
Expand Down Expand Up @@ -1314,3 +1305,18 @@ def elements_index_map(elements,standard=False,inverse=False):
else:
return dict(zip(elements,range(len(elements))))
# %%

def remove_pbc(system, protect_layer = 9):
nframes = len(system["coords"])
natoms = len(system['coords'][0])
for ff in range(nframes):
tmpcoord = system['coords'][ff]
cog = np.average(tmpcoord, axis = 0)
dist = tmpcoord - np.tile(cog, [natoms, 1])
max_dist = np.max(np.linalg.norm(dist, axis = 1))
h_cell_size = max_dist + protect_layer
cell_size = h_cell_size * 2
shift = np.array([1,1,1]) * h_cell_size - cog
system['coords'][ff] = system['coords'][ff] + np.tile(shift, [natoms, 1])
system['cells'][ff] = cell_size * np.eye(3)
return system
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
'dpdata/abacus',
'dpdata/rdkit',
'dpdata/plugins',
'dpdata/pymatgen',
],
package_data={'dpdata':['*.json']},
classifiers=[
Expand Down
16 changes: 16 additions & 0 deletions tests/pymatgen/FA-001.vasp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
C1 H5 N2
1.0
2.2504659203492643e+01 0.0000000000000000e+00 0.0000000000000000e+00
0.0000000000000000e+00 2.2504659203492643e+01 0.0000000000000000e+00
0.0000000000000000e+00 0.0000000000000000e+00 2.2504659203492643e+01
C H N
1 5 2
Cartesian
11.2523296017 11.9178548890 11.2108422059
11.2523296017 11.4836546020 13.4873419539
11.2523296017 9.9657546120 12.5961418889
11.2523296017 11.5757547460 9.0233422059
11.2523296017 9.9209545690 10.0419421929
11.2523296017 13.0578542790 11.2108422059
11.2523296017 11.0671545110 12.4584418079
11.2523296017 11.0296546060 9.9897423529
10 changes: 10 additions & 0 deletions tests/pymatgen/FA-001.xyz
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
8
Lattice="10 0.0 0.0 0.0 10 0.0 0.0 0.0 10" Properties=species:S:1:pos:R:3:Z:I:1
C 3.1742845125747904e-16 5.342900276 5.184000015 6
H 4.568238577398768e-16 4.908699989 7.460499763 1
N 3.938218942178196e-16 4.492199898 6.431599617 7
H 4.022535923897687e-16 3.390799999 6.569299698 1
H 1.834827076007373e-16 5.000800133 2.996500015 1
N 2.4265764993669136e-16 4.454699993 3.962900162 7
H 2.4585396828529157e-16 3.345999956 4.015100002 1
H 3.1742845125747904e-16 6.482899666 5.184000015 1
16 changes: 16 additions & 0 deletions tests/pymatgen/mol2-new.vasp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
H5 C1 N2
1.0
10.000000 0.000000 0.000000
0.000000 10.000000 0.000000
0.000000 0.000000 10.000000
H C N
5 1 2
direct
0.577383 0.631337 0.500367 H
0.440363 0.395639 0.623009 H
0.523296 0.528867 0.297597 H
0.521491 0.528803 0.702604 H
0.441477 0.395690 0.376400 H
0.521985 0.537275 0.500106 C
0.490941 0.484747 0.383775 N
0.489919 0.484705 0.616136 N
16 changes: 16 additions & 0 deletions tests/pymatgen/mol2.vasp
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
hexagonal-PbI3
1.00000000000000
8.4261398403369476 -0.0207094818066008 -0.0031014314051307
-4.2326852079273163 7.4079517949230027 0.0021007606821090
-0.0033015270376727 0.0018001191064931 7.9750946862221301
N C H
2 1 5
Direct
0.3230984076298932 0.6454769144393403 0.1041651474646342
0.3220267174078323 0.6453279258574751 0.3955245499807875
0.3958479883967422 0.7164380537464232 0.2500334235810864
0.5257412318253072 0.8435919770693032 0.2503606409552951
0.2024793011832460 0.5249247262853158 0.4041424751787161
0.3915235842243226 0.7051542887367509 -0.0038936922276933 0.9961063077723067
0.3896311952726235 0.7049043576463694 0.5039461714481914
0.2036571496859804 0.5250922648687301 0.0949178563751044
31 changes: 31 additions & 0 deletions tests/test_pymatgen_molecule.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os
import numpy as np
import unittest
from context import dpdata

class TestPOSCARCart(unittest.TestCase):

def setUp(self):
self.system = dpdata.System()
self.system.from_pymatgen_molecule(os.path.join('pymatgen', 'FA-001.xyz'))
self.assertEqual(list(self.system["atom_types"]), [0, 1, 2, 1, 1, 2, 1, 1])

def test_poscar_to_molecule(self):
tmp_system = dpdata.System()
tmp_system.from_vasp_poscar(os.path.join('pymatgen', 'mol2.vasp'))
natoms = len(tmp_system['coords'][0])
tmpcoord = tmp_system['coords'][0]
cog = np.average(tmpcoord, axis = 0)
dist = tmpcoord - np.tile(cog, [natoms, 1])
max_dist_0 = np.max(np.linalg.norm(dist, axis = 1))

mols = tmp_system.to("pymatgen/molecule")
cog = np.average(mols[-1].cart_coords, axis = 0)
dist = mols[-1].cart_coords - np.tile(cog, [natoms, 1])
max_dist_1 = np.max(np.linalg.norm(dist, axis = 1))
self.assertAlmostEqual(max_dist_0, max_dist_1)



if __name__ == '__main__':
unittest.main()

0 comments on commit 9b35c81

Please sign in to comment.