From 36bbbc2991d6caf2edc695bd1bcff24c5e649675 Mon Sep 17 00:00:00 2001 From: khuston Date: Fri, 18 Mar 2016 15:58:56 -0400 Subject: [PATCH 1/4] Fix Merge for issue-363 branch Rewrote mda.Merge to work for new topology style. I also changed the default argument for attrs in core.Topology() beacuse the default argument of None caused an error, whereas using an empty list instead will return a blank Topology. --- package/MDAnalysis/core/topology.py | 2 +- package/MDAnalysis/core/universe.py | 191 ++++++++++++++++++++-------- 2 files changed, 141 insertions(+), 52 deletions(-) diff --git a/package/MDAnalysis/core/topology.py b/package/MDAnalysis/core/topology.py index ea2d0ce53a2..0c846552cad 100644 --- a/package/MDAnalysis/core/topology.py +++ b/package/MDAnalysis/core/topology.py @@ -315,7 +315,7 @@ class Topology(object): """ def __init__(self, n_atoms=1, n_res=1, n_seg=1, - attrs=None, + attrs=[], atom_resindex=None, residue_segindex=None): self.n_atoms = n_atoms diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index b3288271cac..801c05bfc1c 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1,6 +1,7 @@ import numpy as np from numpy.lib.utils import deprecate import logging +import itertools import MDAnalysis from ..lib import util @@ -496,7 +497,7 @@ def as_Universe(*args, **kwargs): asUniverse = deprecate(as_Universe, old_name='asUniverse', new_name='as_Universe') -#TODO: UPDATE ME WITH NEW TOPOLOGY DETAILS + def Merge(*args): """Return a :class:`Universe` from two or more :class:`AtomGroup` instances. @@ -506,16 +507,23 @@ def Merge(*args): It can also be used with a single :class:`AtomGroup` if the user wants to, for example, re-order the atoms in the Universe. - :Arguments: One or more :class:`AtomGroup` instances. + If multiple :class:`AtomGroup` instances from the same Universe are given, + the merge will first simply "add" together the :class:`AtomGroup` instances. - :Returns: an instance of :class:`~MDAnalaysis.AtomGroup.Universe` + Parameters + ---------- + args : One or more :class:`AtomGroup` instances. + + Returns + ------- + universe : An instance of :class:`~MDAnalaysis.AtomGroup.Universe` :Raises: :exc:`ValueError` for too few arguments or if an AtomGroup is empty and :exc:`TypeError` if arguments are not :class:`AtomGroup` instances. - .. rubric:: Example - + Example + ------- In this example, protein, ligand, and solvent were externally prepared in three different PDB files. They are loaded into separate :class:`Universe` objects (where they could be further manipulated, e.g. renumbered, @@ -530,71 +538,152 @@ def Merge(*args): The complete system is then written out to a new PDB file. - .. Note:: Merging does not create a full trajectory but only a single - structure even if the input consists of one or more trajectories. + Note + ---- + Merging does not create a full trajectory but only a single + structure even if the input consists of one or more trajectories. .. versionchanged 0.9.0:: Raises exceptions instead of assertion errors. """ - import MDAnalysis.topology.core if len(args) == 0: raise ValueError("Need at least one AtomGroup for merging") for a in args: - if not isinstance(a, AtomGroup): + if not isinstance(a, groups.AtomGroup): raise TypeError(repr(a) + " is not an AtomGroup") for a in args: if len(a) == 0: raise ValueError("cannot merge empty AtomGroup") - coords = np.vstack([a.coordinates() for a in args]) + # If any atom groups come from the same Universe, just add them + # together first + disjoint_atom_groups = [] + already_added = [] + for a, b in itertools.combinations(args, r=2): + if a in already_added and b in already_added: + continue + if a.universe is b.universe: + disjoint_atom_groups.append(a + b) + already_added.extend([a, b]) + else: + if a not in already_added: + disjoint_atom_groups.append(a) + already_added.append(a) + if b not in already_added: + disjoint_atom_groups.append(b) + already_added.append(b) + + u = Universe() + # Create a new topology using the intersection of topology attributes + blank_topology_attrs = set(dir(Topology(attrs=[]))) + common_attrs = set.intersection(*[set(dir(ag.universe._topology)) + for ag in disjoint_atom_groups]) + topology_groups = set(['bonds', 'angles', 'dihedrals', 'impropers']) + + # Create set of array-valued attributes which can be simply + # concatenated together + keep_attrs = common_attrs - blank_topology_attrs - topology_groups + + attrs = [] + dtypes = {} + for attrname in keep_attrs: + for ag in disjoint_atom_groups: + attr = getattr(ag, attrname) + type_attr = type(getattr(ag.universe._topology, attrname)) + if type(attr) != np.ndarray: + raise TypeError('Encountered unexpected topology'+ + 'attribute of type {}'.format( + type(attr))) + try: + attr_array.extend(attr) + except NameError: + attr_array = list(attr) + attrs.append(type_attr(np.array(attr_array, + dtype=attr.dtype))) + del attr_array + + # Build up topology groups + for tg in (topology_groups & common_attrs): + bondidx = [] + types = [] + offset = 0 + for ag in disjoint_atom_groups: + bonds = getattr(ag, tg) + bond_class = type(getattr(ag.universe._topology, tg)) + bondidx.extend(bonds.indices + offset) + if hasattr(bonds, '_bondtypes'): + types.extend(bonds.types()) + else: + types.extend([None]*len(bonds)) + offset += len(ag) + bondidx = np.array(bondidx, dtype=np.int32) + if any([t is None for t in types]): + attrs.append(bond_class(values)) + else: + types = np.array(types, dtype='|S8') + attrs.append(bond_class(bondidx, types)) + + # Renumber residue and segment indices + n_atoms = sum([len(ag) for ag in disjoint_atom_groups]) + residx = [] + segidx = [] + for ag in disjoint_atom_groups: + res_offset = len(set(residx)) + resdict = {n: i+res_offset for i, n in enumerate(set(ag.resindices))} + seg_offset = len(set(segidx)) + segdict = {n: i+len(set(segidx)) for i, n in enumerate(set(ag.segindices))} + residx.extend([resdict[n] for n in ag.resindices]) + segidx.extend([segdict[n] for n in ag.segindices]) + + residx = np.array(residx, dtype=np.int32) + segidx = np.array(segidx, dtype=np.int32) + + n_residues = len(set(residx)) + n_segments = len(set(segidx)) + + top = Topology(n_atoms, n_residues, n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx) + + # Put topology in Universe + u._topology = top + + # generate Universe version of each class + # AG, RG, SG, A, R, S + u._classes = groups.make_classes() + + # Put Group level stuff from topology into class + for attr in u._topology.attrs: + u._process_attr(attr) + + # Generate atoms, residues and segments + u.atoms = u._classes['atomgroup']( + np.arange(u._topology.n_atoms), u) + + u.residues = u._classes['residuegroup']( + np.arange( u._topology.n_residues), u) + + u.segments = u._classes['segmentgroup'](np.arange( + u._topology.n_segments), u) + + # Update Universe namespace with segids + for seg in u.segments: + if hasattr(seg, 'segid'): + if seg.segid[0].isdigit(): + name = 's' + seg.segid + else: + name = seg.segid + u.__dict__[name] = seg + + coords = np.vstack([a.positions for a in disjoint_atom_groups]) trajectory = MDAnalysis.coordinates.base.Reader(None) ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords) setattr(trajectory, "ts", ts) trajectory.n_frames = 1 - - # create an empty Universe object - u = Universe() u.trajectory = trajectory - # create a list of Atoms, then convert it to an AtomGroup - atoms = [copy.copy(a) for gr in args for a in gr] - for a in atoms: - a.universe = u - - # adjust the atom numbering - for i, a in enumerate(atoms): - a.index = i - a.serial = i + 1 - u.atoms = AtomGroup(atoms) - - # move over the topology - offset = 0 - tops = ['bonds', 'angles', 'dihedrals', 'impropers'] - idx_lists = {t:[] for t in tops} - for ag in args: - # create a mapping scheme for this atomgroup - mapping = {a.index:i for i, a in enumerate(ag, start=offset)} - offset += len(ag) - - for t in tops: - tg = getattr(ag, t) - # Create a topology group of only bonds that are within this ag - # ie we don't want bonds that extend out of the atomgroup - tg = tg.atomgroup_intersection(ag, strict=True) - - # Map them so they refer to our new indices - new_idx = [tuple(map(lambda x:mapping[x], entry)) - for entry in tg.to_indices()] - idx_lists[t].extend(new_idx) - - for t in tops: - u._topology[t] = idx_lists[t] - - # adjust the residue and segment numbering (removes any remaining references to the old universe) - MDAnalysis.topology.core.build_residues(u.atoms) - MDAnalysis.topology.core.build_segments(u.atoms) - return u From f34db5a441321fe919126dc2a81d37031e0b9ed3 Mon Sep 17 00:00:00 2001 From: khuston Date: Fri, 18 Mar 2016 20:50:01 -0400 Subject: [PATCH 2/4] fix atomgrps for same universe, enable sum My first attempt at collecting atom groups for the same universe was flawed if args contained more than one atom group from the same universe. Hopefully this one is correct. I also override __rsum__ on AtomGroups to allow use of sum. --- package/MDAnalysis/core/groups.py | 22 +++++++++++++++++ package/MDAnalysis/core/topology.py | 4 ++- package/MDAnalysis/core/universe.py | 38 +++++++++++++---------------- 3 files changed, 42 insertions(+), 22 deletions(-) diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index a13ccc60c81..c015d46b413 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -136,6 +136,28 @@ def __add__(self, other): return self.__class__(np.concatenate([self._ix, o_ix]), self._u) + def __radd__(self, other): + """Using built-in sum requires supporting 0 + self. If other is + anything other 0, an exception will be raised. + + Parameters + ---------- + other : int + Other should be 0, or else an exception will be raised. + + Returns + ------- + self + Group with elements of `self` reproduced + + """ + if other == 0: + return self.__class__(self._ix, self._u) + else: + raise TypeError("unsupported operand type(s) for +:"+ + " '{}' and '{}'".format(type(self).__name__, + type(other).__name__)) + def __contains__(self, other): if not other.level == self.level: # maybe raise TypeError instead? diff --git a/package/MDAnalysis/core/topology.py b/package/MDAnalysis/core/topology.py index 0c846552cad..fc5c398f405 100644 --- a/package/MDAnalysis/core/topology.py +++ b/package/MDAnalysis/core/topology.py @@ -315,12 +315,14 @@ class Topology(object): """ def __init__(self, n_atoms=1, n_res=1, n_seg=1, - attrs=[], + attrs=None, atom_resindex=None, residue_segindex=None): self.n_atoms = n_atoms self.n_residues = n_res self.n_segments = n_seg + if attrs is None: + attrs = [] self.tt = TransTable(n_atoms, n_res, n_seg, atom_resindex=atom_resindex, residue_segindex=residue_segindex) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 801c05bfc1c..3a6d65965f0 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -560,30 +560,24 @@ def Merge(*args): # If any atom groups come from the same Universe, just add them # together first + ag_dict = {} + for a in args: + try: + ag_dict[a.universe].append(a) + except KeyError: + ag_dict[a.universe] = [a] + disjoint_atom_groups = [] - already_added = [] - for a, b in itertools.combinations(args, r=2): - if a in already_added and b in already_added: - continue - if a.universe is b.universe: - disjoint_atom_groups.append(a + b) - already_added.extend([a, b]) - else: - if a not in already_added: - disjoint_atom_groups.append(a) - already_added.append(a) - if b not in already_added: - disjoint_atom_groups.append(b) - already_added.append(b) + for ag in ag_dict.values(): + disjoint_atom_groups.append(sum(ag)) - u = Universe() # Create a new topology using the intersection of topology attributes blank_topology_attrs = set(dir(Topology(attrs=[]))) common_attrs = set.intersection(*[set(dir(ag.universe._topology)) for ag in disjoint_atom_groups]) topology_groups = set(['bonds', 'angles', 'dihedrals', 'impropers']) - # Create set of array-valued attributes which can be simply + # Create set of attributes which are array-valued and can be simply # concatenated together keep_attrs = common_attrs - blank_topology_attrs - topology_groups @@ -592,7 +586,7 @@ def Merge(*args): for attrname in keep_attrs: for ag in disjoint_atom_groups: attr = getattr(ag, attrname) - type_attr = type(getattr(ag.universe._topology, attrname)) + attr_class = type(getattr(ag.universe._topology, attrname)) if type(attr) != np.ndarray: raise TypeError('Encountered unexpected topology'+ 'attribute of type {}'.format( @@ -601,7 +595,7 @@ def Merge(*args): attr_array.extend(attr) except NameError: attr_array = list(attr) - attrs.append(type_attr(np.array(attr_array, + attrs.append(attr_class(np.array(attr_array, dtype=attr.dtype))) del attr_array @@ -620,7 +614,7 @@ def Merge(*args): types.extend([None]*len(bonds)) offset += len(ag) bondidx = np.array(bondidx, dtype=np.int32) - if any([t is None for t in types]): + if any(t is None for t in types): attrs.append(bond_class(values)) else: types = np.array(types, dtype='|S8') @@ -634,7 +628,8 @@ def Merge(*args): res_offset = len(set(residx)) resdict = {n: i+res_offset for i, n in enumerate(set(ag.resindices))} seg_offset = len(set(segidx)) - segdict = {n: i+len(set(segidx)) for i, n in enumerate(set(ag.segindices))} + segdict = {n: i+len(set(segidx)) for i, n in enumerate(set( + ag.segindices))} residx.extend([resdict[n] for n in ag.resindices]) segidx.extend([segdict[n] for n in ag.segindices]) @@ -649,7 +644,8 @@ def Merge(*args): atom_resindex=residx, residue_segindex=segidx) - # Put topology in Universe + # Create blank Universe and put topology in it + u = Universe() u._topology = top # generate Universe version of each class From 44f39f31bbb180bcdf8d48d9d1860d7c48e22f78 Mon Sep 17 00:00:00 2001 From: khuston Date: Fri, 18 Mar 2016 22:53:02 -0400 Subject: [PATCH 3/4] cleanup and describe which topologyattr get Merged --- package/MDAnalysis/core/universe.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 3a6d65965f0..0a113325deb 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1,7 +1,7 @@ import numpy as np from numpy.lib.utils import deprecate import logging -import itertools +from itertools import groupby import MDAnalysis from ..lib import util @@ -501,6 +501,9 @@ def as_Universe(*args, **kwargs): def Merge(*args): """Return a :class:`Universe` from two or more :class:`AtomGroup` instances. + The resulting universe will only inherit the common topology attributes that + all merged universes share. + :class:`AtomGroup` instances can come from different Universes, or come directly from a :meth:`~Universe.select_atoms` call. @@ -560,16 +563,9 @@ def Merge(*args): # If any atom groups come from the same Universe, just add them # together first - ag_dict = {} - for a in args: - try: - ag_dict[a.universe].append(a) - except KeyError: - ag_dict[a.universe] = [a] - - disjoint_atom_groups = [] - for ag in ag_dict.values(): - disjoint_atom_groups.append(sum(ag)) + sorted_ag = sorted([a for a in args], key=lambda a: a.universe) + disjoint_atom_groups = [sum(list(ag)) for u, ag in groupby(sorted_ag, + lambda a: a.universe)] # Create a new topology using the intersection of topology attributes blank_topology_attrs = set(dir(Topology(attrs=[]))) @@ -582,7 +578,6 @@ def Merge(*args): keep_attrs = common_attrs - blank_topology_attrs - topology_groups attrs = [] - dtypes = {} for attrname in keep_attrs: for ag in disjoint_atom_groups: attr = getattr(ag, attrname) From c8988523f7fae787db358469574912a15e954f77 Mon Sep 17 00:00:00 2001 From: khuston Date: Mon, 21 Mar 2016 22:05:08 -0400 Subject: [PATCH 4/4] correct Merge atom order, segind, add top tests I changed my previously proposed mda.Merge to align more closely with the old Merge behavior, in that it preserves the atom order as given to Merge. Given the introduction of residue and segment indices (distinct from ids), the behavior of this new Merge assigns separate residue and segment indices to each argument of Merge. That is, if two arguments to merge are atomgroups from the same universe with the same residue indices, in the post-merge universe, they will have different residue indices. If the user doesn't want this, s/he should add together the atom groups before merging. My understanding is that resid is just an attribute of a residue, whereas the residue index is the unique identifier of that residue instance. I didn't think it would make much sense for Merge to duplicate atoms by giving them different indices while keeping their residue indices the same, such that you have doubled or tripled (etc.) the number of atoms in a residue or segment. Before I was sending the per-atom segment indices to Topology instead of the per-residue segment indices. Topology.__init__() or TransTable.__init__() should probably check that the correct length index arrays are passed on. I added an attribute to the TopologyAttrs called `per_object` where the value of this attribute is 'atom', 'residue', or 'segment' if the length of the TopologyAttr value should equal n_atoms, n_residues, or n_segments. The Universe method `_process_attr` now checks if the TopologyAttr has `per_object` attribute, and raises a ValueError of the length of the value array is not as expected. Finally I updated `test_modelling.py`. This actually increased the number of errors raised during testing by 2, because instead of stopping at an error in the first few lines of the test file, it goes on and errors occur for Capping and `atoms.write`. --- package/MDAnalysis/core/topology.py | 2 +- package/MDAnalysis/core/topologyattrs.py | 14 ++ package/MDAnalysis/core/topologyobjects.py | 4 + package/MDAnalysis/core/universe.py | 161 +++++++++++--------- testsuite/MDAnalysisTests/test_modelling.py | 62 +++++--- 5 files changed, 154 insertions(+), 89 deletions(-) diff --git a/package/MDAnalysis/core/topology.py b/package/MDAnalysis/core/topology.py index fc5c398f405..bb4902f6158 100644 --- a/package/MDAnalysis/core/topology.py +++ b/package/MDAnalysis/core/topology.py @@ -36,7 +36,7 @@ class TransTable(object): array must be <= `n_residues`, and the array must be length `n_atoms`; giving None defaults to placing all atoms in residue 0 residue_segindex : 1-D array - segindex for each atom in the topology; the number of unique values in this + segindex for each residue in the topology; the number of unique values in this array must be <= `n_segments`, and the array must be length `n_residues`; giving None defaults to placing all residues in segment 0 diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index afa9a0d527d..587b17009d6 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -265,6 +265,7 @@ class Atomids(AtomAttr): """ attrname = 'ids' singular = 'id' + per_object = 'atom' #TODO: update docs to property doc @@ -273,6 +274,7 @@ class Atomnames(AtomAttr): """ attrname = 'names' singular = 'name' + per_object = 'atom' transplants = defaultdict(list) def getattr__(atomgroup, name): @@ -321,6 +323,7 @@ class Atomtypes(AtomAttr): """Type for each atom""" attrname = 'types' singular = 'type' + per_object = 'atom' #TODO: update docs to property doc @@ -328,6 +331,7 @@ class Radii(AtomAttr): """Radii for each atom""" attrname = 'radii' singular = 'radius' + per_object = 'atom' class ChainIDs(AtomAttr): @@ -339,24 +343,28 @@ class ChainIDs(AtomAttr): """ attrname = 'chainIDs' singular = 'chainID' + per_object = 'atom' class ICodes(AtomAttr): """Insertion code for Atoms""" attrname = 'icodes' singular = 'icode' + per_object = 'atom' class Tempfactors(AtomAttr): """Tempfactor for atoms""" attrname = 'tempfactors' singular = 'tempfactor' + per_object = 'atom' #TODO: need to add cacheing class Masses(AtomAttr): attrname = 'masses' singular = 'mass' + per_object = 'atom' target_levels = ['atom', 'residue', 'segment'] transplants = defaultdict(list) @@ -641,6 +649,7 @@ def principal_axes(atomgroup, **kwargs): class Charges(AtomAttr): attrname = 'charges' singular = 'charge' + per_object = 'atom' target_levels = ['atom', 'residue', 'segment'] transplants = defaultdict(list) @@ -679,12 +688,14 @@ class Bfactors(AtomAttr): """Crystallographic B-factors in A**2 for each atom""" attrname = 'bfactors' singular = 'bfactor' + per_object = 'atom' #TODO: update docs to property doc class Occupancies(AtomAttr): attrname = 'occupancies' singular = 'occupancy' + per_object = 'atom' #TODO: update docs to property doc @@ -692,6 +703,7 @@ class AltLocs(AtomAttr): """AltLocs for each atom""" attrname = 'altLocs' singular = 'altLoc' + per_object = 'atom' ## residue attributes @@ -708,6 +720,7 @@ class ResidueAttr(TopologyAttr): attrname = 'residueattrs' singular = 'residueattr' target_levels = ['residue'] + per_object = 'residue' def get_atoms(self, ag): rix = self.top.tt.atoms2residues(ag._ix) @@ -802,6 +815,7 @@ class SegmentAttr(TopologyAttr): attrname = 'segmentattrs' singular = 'segmentattr' target_levels = ['segment'] + per_object = 'segment' def get_atoms(self, ag): six = self.top.tt.atoms2segments(ag._ix) diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py index 105824acead..8fced8ace8a 100644 --- a/package/MDAnalysis/core/topologyobjects.py +++ b/package/MDAnalysis/core/topologyobjects.py @@ -577,6 +577,10 @@ def atomgroup_intersection(self, ag, **kwargs): .. versionadded:: 0.9.0 """ + # Issue #780 - if self is empty, return self to avoid invalid mask + if not self: + return self + # Strict requires all items in a row to be seen, # otherwise any item in a row func = np.all if kwargs.get('strict', False) else np.any diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index 0a113325deb..ee127a18a84 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -8,6 +8,7 @@ from ..lib.util import cached from . import groups from .topology import Topology +from .topologyattrs import AtomAttr, ResidueAttr, SegmentAttr logger = logging.getLogger("MDAnalysis.core.universe") @@ -181,6 +182,13 @@ def __init__(self, *args, **kwargs): " with parser {1} \n" "Error: {2}".format(self.filename, parser, err)) + # generate and populate Universe version of each class + self._generate_from_topology() + + # Load coordinates + self.load_new(coordinatefile, **kwargs) + + def _generate_from_topology(self): # generate Universe version of each class # AG, RG, SG, A, R, S self._classes = groups.make_classes() @@ -208,9 +216,6 @@ def __init__(self, *args, **kwargs): name = seg.segid self.__dict__[name] = seg - # Load coordinates - self.load_new(coordinatefile, **kwargs) - @property def universe(self): # for Writer.write(universe), see Issue 49 @@ -384,6 +389,19 @@ def _process_attr(self, attr): - Component properties - Transplant methods """ + n_dict = {'atom': self._topology.n_atoms, + 'residue': self._topology.n_residues, + 'segment': self._topology.n_segments} + if hasattr(attr, 'per_object') and \ + len(attr) != n_dict[attr.per_object]: + raise ValueError('Length of {attr} does not' + ' match number of {obj}s.\n' + 'Expect: {n:d} Have: {m:d}'.format( + attr=attr.attrname, + obj=attr.per_object, + n=n_dict[attr.per_object], + m=len(attr))) + self._classes['group']._add_prop(attr) for level in attr.target_levels: @@ -550,6 +568,7 @@ def Merge(*args): Raises exceptions instead of assertion errors. """ + from ..topology.base import squash_by if len(args) == 0: raise ValueError("Need at least one AtomGroup for merging") @@ -561,76 +580,99 @@ def Merge(*args): if len(a) == 0: raise ValueError("cannot merge empty AtomGroup") - # If any atom groups come from the same Universe, just add them - # together first - sorted_ag = sorted([a for a in args], key=lambda a: a.universe) - disjoint_atom_groups = [sum(list(ag)) for u, ag in groupby(sorted_ag, - lambda a: a.universe)] - # Create a new topology using the intersection of topology attributes blank_topology_attrs = set(dir(Topology(attrs=[]))) common_attrs = set.intersection(*[set(dir(ag.universe._topology)) - for ag in disjoint_atom_groups]) - topology_groups = set(['bonds', 'angles', 'dihedrals', 'impropers']) + for ag in args]) + tops = set(['bonds', 'angles', 'dihedrals', 'impropers']) + + attrs = [] # Create set of attributes which are array-valued and can be simply # concatenated together - keep_attrs = common_attrs - blank_topology_attrs - topology_groups - - attrs = [] - for attrname in keep_attrs: - for ag in disjoint_atom_groups: + common_array_attrs = common_attrs - blank_topology_attrs - tops + # Build up array-valued topology attributes including only attributes + # that all arguments' universes have + for attrname in common_array_attrs: + for ag in args: attr = getattr(ag, attrname) attr_class = type(getattr(ag.universe._topology, attrname)) + if issubclass(attr_class, AtomAttr): + pass + elif issubclass(attr_class, ResidueAttr): + attr = getattr(ag.residues, attrname) + elif issubclass(attr_class, SegmentAttr): + attr = getattr(ag.segments, attrname) + else: + raise NotImplementedError("Don't know how to handle" + " TopologyAttr not subclassed" + " from AtomAttr, ResidueAttr," + " or SegmentAttr.") if type(attr) != np.ndarray: - raise TypeError('Encountered unexpected topology'+ - 'attribute of type {}'.format( - type(attr))) + raise TypeError('Encountered unexpected topology ' + 'attribute of type {}'.format(type(attr))) try: attr_array.extend(attr) except NameError: attr_array = list(attr) - attrs.append(attr_class(np.array(attr_array, - dtype=attr.dtype))) + attrs.append(attr_class(np.array(attr_array, dtype=attr.dtype))) del attr_array - # Build up topology groups - for tg in (topology_groups & common_attrs): + # Build up topology groups including only those that all arguments' + # universes have + for t in (tops & common_attrs): + offset = 0 bondidx = [] types = [] - offset = 0 - for ag in disjoint_atom_groups: - bonds = getattr(ag, tg) - bond_class = type(getattr(ag.universe._topology, tg)) - bondidx.extend(bonds.indices + offset) - if hasattr(bonds, '_bondtypes'): - types.extend(bonds.types()) - else: - types.extend([None]*len(bonds)) + for ag in args: + # create a mapping scheme for this atomgroup + mapping = {a.index:i for i, a in enumerate(ag, start=offset)} offset += len(ag) - bondidx = np.array(bondidx, dtype=np.int32) + + tg = getattr(ag, t) + bonds_class = type(getattr(ag.universe._topology, t)) + # Create a topology group of only bonds that are within this ag + # ie we don't want bonds that extend out of the atomgroup + tg = tg.atomgroup_intersection(ag, strict=True) + + # Map them so they refer to our new indices + new_idx = [tuple(map(lambda x:mapping[x], entry)) + for entry in tg.indices] + bondidx.extend(new_idx) + if hasattr(tg, '_bondtypes'): + types.extend(tg._bondtypes) + else: + types.extend([None]*len(tg)) if any(t is None for t in types): - attrs.append(bond_class(values)) + attrs.append(bonds_class(bondidx)) else: types = np.array(types, dtype='|S8') - attrs.append(bond_class(bondidx, types)) + attrs.append(bonds_class(bondidx, types)) # Renumber residue and segment indices - n_atoms = sum([len(ag) for ag in disjoint_atom_groups]) + n_atoms = sum([len(ag) for ag in args]) residx = [] segidx = [] - for ag in disjoint_atom_groups: - res_offset = len(set(residx)) - resdict = {n: i+res_offset for i, n in enumerate(set(ag.resindices))} - seg_offset = len(set(segidx)) - segdict = {n: i+len(set(segidx)) for i, n in enumerate(set( - ag.segindices))} - residx.extend([resdict[n] for n in ag.resindices]) - segidx.extend([segdict[n] for n in ag.segindices]) + res_offset = 0 + seg_offset = 0 + for ag in args: + # create a mapping scheme for this atomgroup's parents + res_mapping = {r.resindex: i for i, r in enumerate(ag.residues, + start=res_offset)} + seg_mapping = {r.segindex: i for i, r in enumerate(ag.segments, + start=seg_offset)} + res_offset += len(ag.residues) + seg_offset += len(ag.segments) + + # Map them so they refer to our new indices + residx.extend(map(lambda x:res_mapping[x], ag.resindices)) + segidx.extend(map(lambda x:seg_mapping[x], ag.segindices)) residx = np.array(residx, dtype=np.int32) segidx = np.array(segidx, dtype=np.int32) + _, _, [segidx] = squash_by(residx, segidx) + n_residues = len(set(residx)) n_segments = len(set(segidx)) @@ -643,34 +685,11 @@ def Merge(*args): u = Universe() u._topology = top - # generate Universe version of each class - # AG, RG, SG, A, R, S - u._classes = groups.make_classes() - - # Put Group level stuff from topology into class - for attr in u._topology.attrs: - u._process_attr(attr) - - # Generate atoms, residues and segments - u.atoms = u._classes['atomgroup']( - np.arange(u._topology.n_atoms), u) - - u.residues = u._classes['residuegroup']( - np.arange( u._topology.n_residues), u) - - u.segments = u._classes['segmentgroup'](np.arange( - u._topology.n_segments), u) - - # Update Universe namespace with segids - for seg in u.segments: - if hasattr(seg, 'segid'): - if seg.segid[0].isdigit(): - name = 's' + seg.segid - else: - name = seg.segid - u.__dict__[name] = seg + # Generate universe and populate namespace + u._generate_from_topology() - coords = np.vstack([a.positions for a in disjoint_atom_groups]) + # Take one frame of coordinates from combined atomgroups + coords = np.vstack([a.positions for a in args]) trajectory = MDAnalysis.coordinates.base.Reader(None) ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords) setattr(trajectory, "ts", ts) diff --git a/testsuite/MDAnalysisTests/test_modelling.py b/testsuite/MDAnalysisTests/test_modelling.py index 9f5ab3385f9..d70232ee0a7 100644 --- a/testsuite/MDAnalysisTests/test_modelling.py +++ b/testsuite/MDAnalysisTests/test_modelling.py @@ -19,8 +19,8 @@ TRZ, TRZ_psf, \ capping_input, capping_output, capping_ace, capping_nma, \ merge_protein, merge_ligand, merge_water -import MDAnalysis.core.AtomGroup -from MDAnalysis.core.AtomGroup import Atom, AtomGroup +import MDAnalysis.core.groups +from MDAnalysis.core.groups import make_classes from MDAnalysis import NoDataError import numpy as np @@ -34,6 +34,8 @@ from MDAnalysis import Universe, Merge from MDAnalysis.analysis.align import alignto +AtomGroup = make_classes()['atomgroup'] + def capping(ref, ace, nma, output): resids = ref.select_atoms("all").resids @@ -124,7 +126,6 @@ def test_capping_inmemory(self): assert_array_equal(peptide.trajectory.ts.dimensions, u.trajectory.ts.dimensions) - class TestMerge(TestCase): ext = "pdb" @@ -154,6 +155,12 @@ def test_merge(self): # Check that the output Universe has the same number of atoms as the # starting AtomGroups assert_equal(len(u0.atoms), (len(u1.atoms) + len(u2.atoms) + len(u3.atoms))) + # Check that the output Universe has the same number of residues and + # segments as the starting AtomGroups + assert_equal(len(u0.residues), (len(u1.residues) + len(u2.residues) + + len(u3.residues))) + assert_equal(len(u0.segments), (len(u1.segments) + len(u2.segments) + + len(u3.segments))) # Make sure that all the atoms in the new universe are assigned to only # one, new Universe @@ -177,6 +184,14 @@ def test_merge(self): ids_new2 = [a.index for a in u.atoms] assert_equal(ids_new, ids_new2) + def test_merge_same_universe(self): + u1, _, _ = self.universes + + u0 = MDAnalysis.Merge(u1.atoms, u1.atoms, u1.atoms) + assert_equal(len(u0.atoms), 3*len(u1.atoms)) + assert_equal(len(u0.residues), 3*len(u1.residues)) + assert_equal(len(u0.segments), 3*len(u1.segments)) + def test_residue_references(self): u1, u2, u3 = self.universes m = Merge(u1.atoms, u2.atoms) @@ -196,8 +211,8 @@ def test_nonsense_TypeError(self): assert_raises(TypeError, Merge, ['1', 2]) def test_emptyAG_ValueError(self): - a = AtomGroup([]) - b = AtomGroup([]) + a = AtomGroup([], None) + b = AtomGroup([], None) assert_raises(ValueError, Merge, a, b) @@ -205,9 +220,11 @@ class TestMergeTopology(object): """Test that Merge correct does topology""" def setUp(self): self.u = MDAnalysis.Universe(PSF, DCD) + self.u2 = MDAnalysis.Universe(merge_protein) def tearDown(self): del self.u + del self.u2 def test_merge_with_topology(self): ag1 = self.u.atoms[:20] @@ -216,25 +233,36 @@ def test_merge_with_topology(self): u2 = MDAnalysis.Merge(ag1, ag2) assert_(len(u2.atoms) == 30) - assert_(len(u2.bonds) == 28) - assert_(len(u2.angles) == 47) - assert_(len(u2.dihedrals) == 53) - assert_(len(u2.impropers) == 1) + assert_(len(u2.atoms.bonds) == 28) + assert_(len(u2.atoms.angles) == 47) + assert_(len(u2.atoms.dihedrals) == 53) + assert_(len(u2.atoms.impropers) == 1) # All these bonds are in the merged Universe assert_(len(ag1[0].bonds) == len(u2.atoms[0].bonds)) # One of these bonds isn't in the merged Universe assert_(len(ag2[0].bonds) -1 == len(u2.atoms[20].bonds)) + def test_merge_with_topology_from_different_universes(self): + u3 = MDAnalysis.Merge(self.u.atoms[:110], self.u2.atoms) + + # merge_protein doesn't contain bond topology, so merged universe + # shouldn't have one either + print(u3.atoms.bonds) + # PDB reader yields empty Bonds group, which means bonds from + # PSF/DCD survive the merge + #assert_(not hasattr(u3.atoms, 'bonds') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'angles') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'dihedrals') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'impropers') or len(u3.atoms.bonds) == 0) + def test_merge_without_topology(self): # This shouldn't have topology as we merged single atoms - ag1 = AtomGroup([self.u.atoms[1]]) - ag2 = AtomGroup([self.u.atoms[10]]) - - u2 = MDAnalysis.Merge(ag1, ag2) + u2 = MDAnalysis.Merge(self.u.atoms[0:1], self.u.atoms[10:11]) assert_(len(u2.atoms) == 2) - assert_(len(u2.bonds) == 0) - assert_(len(u2.angles) == 0) - assert_(len(u2.dihedrals) == 0) - assert_(len(u2.impropers) == 0) + assert_(len(u2.atoms.bonds) == 0) + assert_(len(u2.atoms.angles) == 0) + assert_(len(u2.atoms.dihedrals) == 0) + assert_(len(u2.atoms.impropers) == 0) +