diff --git a/dpdata/plugins/pymatgen.py b/dpdata/plugins/pymatgen.py index 84d71f0e4..f29ac382b 100644 --- a/dpdata/plugins/pymatgen.py +++ b/dpdata/plugins/pymatgen.py @@ -1,4 +1,6 @@ from dpdata.format import Format +import dpdata.pymatgen.molecule +import numpy as np @Format.register("pymatgen/structure") @@ -22,6 +24,37 @@ def to_system(self, data, **kwargs): return structures +@Format.register("pymatgen/molecule") +class PyMatgenMoleculeFormat(Format): + @Format.post("remove_pbc") + def from_system(self, file_name, **kwargs): + try: + from pymatgen.core import Molecule + except ModuleNotFoundError as e: + raise ImportError('No module pymatgen.Molecule') from e + + return dpdata.pymatgen.molecule.to_system_data(file_name) + + def to_system(self, data, **kwargs): + """convert System to Pymatgen Molecule obj + """ + molecules = [] + try: + from pymatgen.core import Molecule + except ModuleNotFoundError as e: + raise ImportError('No module pymatgen.Molecule') from e + + species = [] + for name, numb in zip(data['atom_names'], data['atom_numbs']): + species.extend([name]*numb) + data = dpdata.system.remove_pbc(data) + for ii in range(np.array(data['coords']).shape[0]): + molecule = Molecule( + species, data['coords'][ii]) + molecules.append(molecule) + return molecules + + @Format.register("pymatgen/computedstructureentry") @Format.register_to("to_pymatgen_ComputedStructureEntry") class PyMatgenCSEFormat(Format): @@ -44,3 +77,5 @@ def to_labeled_system(self, data, *args, **kwargs): entry = ComputedStructureEntry(structure, energy, data=csedata) entries.append(entry) return entries + + diff --git a/dpdata/pymatgen/__init__.py b/dpdata/pymatgen/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/dpdata/pymatgen/molecule.py b/dpdata/pymatgen/molecule.py new file mode 100644 index 000000000..f671d6ea8 --- /dev/null +++ b/dpdata/pymatgen/molecule.py @@ -0,0 +1,26 @@ +import numpy as np +from pymatgen.core import Molecule +from collections import Counter +import dpdata + +def to_system_data(file_name, protect_layer = 9) : + mol = Molecule.from_file(file_name) + elem_mol = list(str(site.species.elements[0]) for site in mol.sites) + elem_counter = Counter(elem_mol) + atom_names = list(elem_counter.keys()) + atom_numbs = list(elem_counter.values()) + atom_types = [list(atom_names).index(e) for e in elem_mol] + natoms = np.sum(atom_numbs) + + tmpcoord = np.copy(mol.cart_coords) + + system = {} + system['atom_names'] = atom_names + system['atom_numbs'] = atom_numbs + system['atom_types'] = np.array(atom_types, dtype = int) + # center = [c - h_cell_size for c in mol.center_of_mass] + system['orig'] = np.array([0, 0, 0]) + + system['coords'] = [tmpcoord] + system['cells'] = [10.0 * np.eye(3)] + return system diff --git a/dpdata/system.py b/dpdata/system.py index 00185b1a6..9af446d2c 100644 --- a/dpdata/system.py +++ b/dpdata/system.py @@ -9,6 +9,7 @@ from monty.serialization import loadfn,dumpfn from dpdata.periodic_table import Element from dpdata.amber.mask import pick_by_amber_mask, load_param_file +import dpdata # ensure all plugins are loaded! import dpdata.plugins @@ -418,7 +419,7 @@ def extend(self, systems): for system in systems: self.append(system.copy()) - + def apply_pbc(self) : """ Append periodic boundary condition @@ -428,6 +429,7 @@ def apply_pbc(self) : self.data['coords'] = np.matmul(ncoord, self.data['cells']) + @post_funcs.register("remove_pbc") def remove_pbc(self, protect_layer = 9): """ This method does NOT delete the definition of the cells, it @@ -441,19 +443,8 @@ def remove_pbc(self, protect_layer = 9): protect_layer : the protect layer between the atoms and the cell boundary """ - nframes = self.get_nframes() - natoms = self.get_natoms() assert(protect_layer >= 0), "the protect_layer should be no less than 0" - for ff in range(nframes): - tmpcoord = self.data['coords'][ff] - cog = np.average(tmpcoord, axis = 0) - dist = tmpcoord - np.tile(cog, [natoms, 1]) - max_dist = np.max(np.linalg.norm(dist, axis = 1)) - h_cell_size = max_dist + protect_layer - cell_size = h_cell_size * 2 - shift = np.array([1,1,1]) * h_cell_size - cog - self.data['coords'][ff] = self.data['coords'][ff] + np.tile(shift, [natoms, 1]) - self.data['cells'][ff] = cell_size * np.eye(3) + remove_pbc(self.data, protect_layer) def affine_map(self, trans, f_idx = 0) : assert(np.linalg.det(trans) != 0) @@ -1314,3 +1305,18 @@ def elements_index_map(elements,standard=False,inverse=False): else: return dict(zip(elements,range(len(elements)))) # %% + +def remove_pbc(system, protect_layer = 9): + nframes = len(system["coords"]) + natoms = len(system['coords'][0]) + for ff in range(nframes): + tmpcoord = system['coords'][ff] + cog = np.average(tmpcoord, axis = 0) + dist = tmpcoord - np.tile(cog, [natoms, 1]) + max_dist = np.max(np.linalg.norm(dist, axis = 1)) + h_cell_size = max_dist + protect_layer + cell_size = h_cell_size * 2 + shift = np.array([1,1,1]) * h_cell_size - cog + system['coords'][ff] = system['coords'][ff] + np.tile(shift, [natoms, 1]) + system['cells'][ff] = cell_size * np.eye(3) + return system diff --git a/setup.py b/setup.py index 99cac1219..428a2bfe7 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ 'dpdata/abacus', 'dpdata/rdkit', 'dpdata/plugins', + 'dpdata/pymatgen', ], package_data={'dpdata':['*.json']}, classifiers=[ diff --git a/tests/pymatgen/FA-001.vasp b/tests/pymatgen/FA-001.vasp new file mode 100644 index 000000000..1b267adf9 --- /dev/null +++ b/tests/pymatgen/FA-001.vasp @@ -0,0 +1,16 @@ +C1 H5 N2 +1.0 +2.2504659203492643e+01 0.0000000000000000e+00 0.0000000000000000e+00 +0.0000000000000000e+00 2.2504659203492643e+01 0.0000000000000000e+00 +0.0000000000000000e+00 0.0000000000000000e+00 2.2504659203492643e+01 +C H N +1 5 2 +Cartesian + 11.2523296017 11.9178548890 11.2108422059 + 11.2523296017 11.4836546020 13.4873419539 + 11.2523296017 9.9657546120 12.5961418889 + 11.2523296017 11.5757547460 9.0233422059 + 11.2523296017 9.9209545690 10.0419421929 + 11.2523296017 13.0578542790 11.2108422059 + 11.2523296017 11.0671545110 12.4584418079 + 11.2523296017 11.0296546060 9.9897423529 diff --git a/tests/pymatgen/FA-001.xyz b/tests/pymatgen/FA-001.xyz new file mode 100644 index 000000000..ae5b55695 --- /dev/null +++ b/tests/pymatgen/FA-001.xyz @@ -0,0 +1,10 @@ +8 +Lattice="10 0.0 0.0 0.0 10 0.0 0.0 0.0 10" Properties=species:S:1:pos:R:3:Z:I:1 +C 3.1742845125747904e-16 5.342900276 5.184000015 6 +H 4.568238577398768e-16 4.908699989 7.460499763 1 +N 3.938218942178196e-16 4.492199898 6.431599617 7 +H 4.022535923897687e-16 3.390799999 6.569299698 1 +H 1.834827076007373e-16 5.000800133 2.996500015 1 +N 2.4265764993669136e-16 4.454699993 3.962900162 7 +H 2.4585396828529157e-16 3.345999956 4.015100002 1 +H 3.1742845125747904e-16 6.482899666 5.184000015 1 diff --git a/tests/pymatgen/mol2-new.vasp b/tests/pymatgen/mol2-new.vasp new file mode 100644 index 000000000..22db2fa9a --- /dev/null +++ b/tests/pymatgen/mol2-new.vasp @@ -0,0 +1,16 @@ +H5 C1 N2 +1.0 +10.000000 0.000000 0.000000 +0.000000 10.000000 0.000000 +0.000000 0.000000 10.000000 +H C N +5 1 2 +direct +0.577383 0.631337 0.500367 H +0.440363 0.395639 0.623009 H +0.523296 0.528867 0.297597 H +0.521491 0.528803 0.702604 H +0.441477 0.395690 0.376400 H +0.521985 0.537275 0.500106 C +0.490941 0.484747 0.383775 N +0.489919 0.484705 0.616136 N diff --git a/tests/pymatgen/mol2.vasp b/tests/pymatgen/mol2.vasp new file mode 100644 index 000000000..ea4ec2baf --- /dev/null +++ b/tests/pymatgen/mol2.vasp @@ -0,0 +1,16 @@ +hexagonal-PbI3 + 1.00000000000000 + 8.4261398403369476 -0.0207094818066008 -0.0031014314051307 + -4.2326852079273163 7.4079517949230027 0.0021007606821090 + -0.0033015270376727 0.0018001191064931 7.9750946862221301 + N C H + 2 1 5 +Direct + 0.3230984076298932 0.6454769144393403 0.1041651474646342 + 0.3220267174078323 0.6453279258574751 0.3955245499807875 + 0.3958479883967422 0.7164380537464232 0.2500334235810864 + 0.5257412318253072 0.8435919770693032 0.2503606409552951 + 0.2024793011832460 0.5249247262853158 0.4041424751787161 + 0.3915235842243226 0.7051542887367509 -0.0038936922276933 0.9961063077723067 + 0.3896311952726235 0.7049043576463694 0.5039461714481914 + 0.2036571496859804 0.5250922648687301 0.0949178563751044 diff --git a/tests/test_pymatgen_molecule.py b/tests/test_pymatgen_molecule.py new file mode 100644 index 000000000..4a1eb0291 --- /dev/null +++ b/tests/test_pymatgen_molecule.py @@ -0,0 +1,31 @@ +import os +import numpy as np +import unittest +from context import dpdata + +class TestPOSCARCart(unittest.TestCase): + + def setUp(self): + self.system = dpdata.System() + self.system.from_pymatgen_molecule(os.path.join('pymatgen', 'FA-001.xyz')) + self.assertEqual(list(self.system["atom_types"]), [0, 1, 2, 1, 1, 2, 1, 1]) + + def test_poscar_to_molecule(self): + tmp_system = dpdata.System() + tmp_system.from_vasp_poscar(os.path.join('pymatgen', 'mol2.vasp')) + natoms = len(tmp_system['coords'][0]) + tmpcoord = tmp_system['coords'][0] + cog = np.average(tmpcoord, axis = 0) + dist = tmpcoord - np.tile(cog, [natoms, 1]) + max_dist_0 = np.max(np.linalg.norm(dist, axis = 1)) + + mols = tmp_system.to("pymatgen/molecule") + cog = np.average(mols[-1].cart_coords, axis = 0) + dist = mols[-1].cart_coords - np.tile(cog, [natoms, 1]) + max_dist_1 = np.max(np.linalg.norm(dist, axis = 1)) + self.assertAlmostEqual(max_dist_0, max_dist_1) + + + +if __name__ == '__main__': + unittest.main()