diff --git a/package/MDAnalysis/topology/TOPParser.py b/package/MDAnalysis/topology/TOPParser.py index 7e45727309b..b20c3120dd2 100644 --- a/package/MDAnalysis/topology/TOPParser.py +++ b/package/MDAnalysis/topology/TOPParser.py @@ -93,7 +93,7 @@ from .tables import Z2SYMB from ..lib.util import openany, FORTRANReader -from .base import TopologyReaderBase +from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, @@ -106,6 +106,7 @@ Resids, Resnums, Segids, + ChainIDs, AtomAttr, Bonds, Angles, @@ -140,6 +141,8 @@ class TOPParser(TopologyReaderBase): - Bonds - Angles - Dihedrals (inc. impropers) + - ChainIDs (from %RESIDUE_CHAINID) + - Segids (from %RESIDUE_CHAINID) The format is defined in `PARM parameter/topology file specification`_. The reader tries to detect if it is a newer @@ -148,6 +151,10 @@ class TOPParser(TopologyReaderBase): .. _`PARM parameter/topology file specification`: http://ambermd.org/formats.html#topology + Additionally, the %RESIDUE_CHAINID non-standard flag is supported. This + can be added with the addPDB command from parmed: + https://parmed.github.io/ParmEd/html/parmed.html#addpdb + Notes ----- Elements are obtained from the atomic numbers (if present). If a given @@ -188,7 +195,8 @@ def parse(self, **kwargs): "ANGLES_INC_HYDROGEN": (4, 10, self.parse_bonded, "angh", 4), "ANGLES_WITHOUT_HYDROGEN": (4, 10, self.parse_bonded, "anga", 5), "DIHEDRALS_INC_HYDROGEN": (5, 10, self.parse_bonded, "dihh", 6), - "DIHEDRALS_WITHOUT_HYDROGEN": (5, 10, self.parse_bonded, "diha", 7) + "DIHEDRALS_WITHOUT_HYDROGEN": (5, 10, self.parse_bonded, "diha", 7), + "RESIDUE_CHAINID": (1, 20, self.parse_chainids, "segids", 11), } attrs = {} # empty dict for attrs that we'll fill @@ -298,12 +306,21 @@ def next_getter(): attrs['atomids'] = Atomids(np.arange(n_atoms) + 1) attrs['resids'] = Resids(np.arange(n_res) + 1) attrs['resnums'] = Resnums(np.arange(n_res) + 1) - attrs['segids'] = Segids(np.array(['SYSTEM'], dtype=object)) - top = Topology(n_atoms, n_res, 1, + if 'segids' in attrs: + segidx, (segids,) = change_squash((attrs['segids'],), (attrs['segids'],)) + attrs['segids'] = Segids(segids) + attrs['ChainIDs'] = ChainIDs(segids) + n_segs = len(segids) + else: + attrs['segids'] = Segids(np.array(['SYSTEM'], dtype=object)) + segidx = None + n_segs = 1 + + top = Topology(n_atoms, n_res, n_segs, attrs=list(attrs.values()), atom_resindex=residx, - residue_segindex=None) + residue_segindex=segidx) return top @@ -563,9 +580,15 @@ def parsesection_mapper(self, numlines, mapper): A list of all entries in a given parm7 section """ section = [] - y = next(self.topfile).strip("%FORMAT(") - y.strip(")") - x = FORTRANReader(y) + + def get_fmt(file): + if (line := next(file))[:7] == "%FORMAT": + return line[8:].split(")")[0] + else: + return get_fmt(file) + # There may be %COMMENT lines before the %FORMAT statement. Skip them. + fmt = get_fmt(self.topfile) + x = FORTRANReader(fmt) for i in range(numlines): l = next(self.topfile) for j in range(len(x.entries)): @@ -620,3 +643,23 @@ def parse_dihedrals(self, diha, dihh): dihedrals = Dihedrals(dihed) impropers = Impropers(improp) return dihedrals, impropers + + def parse_chainids(self, num_per_record, numlines): + """Extracts the chainID of each residue + + Parameters + ---------- + num_per_record : int + The number of entries for each record in section (unused input) + numlines : int + The number of lines to be parsed in current section + + Returns + ------- + attr : :class:`Segids` + A :class:`Segids` instance containing the chainID of each residue + as defined in the parm7 file + """ + vals = self.parsesection_mapper(numlines, lambda x: x) + attr = np.array(vals) + return attr