diff --git a/package/MDAnalysis/core/groups.py b/package/MDAnalysis/core/groups.py index a13ccc60c81..c015d46b413 100644 --- a/package/MDAnalysis/core/groups.py +++ b/package/MDAnalysis/core/groups.py @@ -136,6 +136,28 @@ def __add__(self, other): return self.__class__(np.concatenate([self._ix, o_ix]), self._u) + def __radd__(self, other): + """Using built-in sum requires supporting 0 + self. If other is + anything other 0, an exception will be raised. + + Parameters + ---------- + other : int + Other should be 0, or else an exception will be raised. + + Returns + ------- + self + Group with elements of `self` reproduced + + """ + if other == 0: + return self.__class__(self._ix, self._u) + else: + raise TypeError("unsupported operand type(s) for +:"+ + " '{}' and '{}'".format(type(self).__name__, + type(other).__name__)) + def __contains__(self, other): if not other.level == self.level: # maybe raise TypeError instead? diff --git a/package/MDAnalysis/core/topology.py b/package/MDAnalysis/core/topology.py index ea2d0ce53a2..bb4902f6158 100644 --- a/package/MDAnalysis/core/topology.py +++ b/package/MDAnalysis/core/topology.py @@ -36,7 +36,7 @@ class TransTable(object): array must be <= `n_residues`, and the array must be length `n_atoms`; giving None defaults to placing all atoms in residue 0 residue_segindex : 1-D array - segindex for each atom in the topology; the number of unique values in this + segindex for each residue in the topology; the number of unique values in this array must be <= `n_segments`, and the array must be length `n_residues`; giving None defaults to placing all residues in segment 0 @@ -321,6 +321,8 @@ def __init__(self, n_atoms=1, n_res=1, n_seg=1, self.n_atoms = n_atoms self.n_residues = n_res self.n_segments = n_seg + if attrs is None: + attrs = [] self.tt = TransTable(n_atoms, n_res, n_seg, atom_resindex=atom_resindex, residue_segindex=residue_segindex) diff --git a/package/MDAnalysis/core/topologyattrs.py b/package/MDAnalysis/core/topologyattrs.py index afa9a0d527d..587b17009d6 100644 --- a/package/MDAnalysis/core/topologyattrs.py +++ b/package/MDAnalysis/core/topologyattrs.py @@ -265,6 +265,7 @@ class Atomids(AtomAttr): """ attrname = 'ids' singular = 'id' + per_object = 'atom' #TODO: update docs to property doc @@ -273,6 +274,7 @@ class Atomnames(AtomAttr): """ attrname = 'names' singular = 'name' + per_object = 'atom' transplants = defaultdict(list) def getattr__(atomgroup, name): @@ -321,6 +323,7 @@ class Atomtypes(AtomAttr): """Type for each atom""" attrname = 'types' singular = 'type' + per_object = 'atom' #TODO: update docs to property doc @@ -328,6 +331,7 @@ class Radii(AtomAttr): """Radii for each atom""" attrname = 'radii' singular = 'radius' + per_object = 'atom' class ChainIDs(AtomAttr): @@ -339,24 +343,28 @@ class ChainIDs(AtomAttr): """ attrname = 'chainIDs' singular = 'chainID' + per_object = 'atom' class ICodes(AtomAttr): """Insertion code for Atoms""" attrname = 'icodes' singular = 'icode' + per_object = 'atom' class Tempfactors(AtomAttr): """Tempfactor for atoms""" attrname = 'tempfactors' singular = 'tempfactor' + per_object = 'atom' #TODO: need to add cacheing class Masses(AtomAttr): attrname = 'masses' singular = 'mass' + per_object = 'atom' target_levels = ['atom', 'residue', 'segment'] transplants = defaultdict(list) @@ -641,6 +649,7 @@ def principal_axes(atomgroup, **kwargs): class Charges(AtomAttr): attrname = 'charges' singular = 'charge' + per_object = 'atom' target_levels = ['atom', 'residue', 'segment'] transplants = defaultdict(list) @@ -679,12 +688,14 @@ class Bfactors(AtomAttr): """Crystallographic B-factors in A**2 for each atom""" attrname = 'bfactors' singular = 'bfactor' + per_object = 'atom' #TODO: update docs to property doc class Occupancies(AtomAttr): attrname = 'occupancies' singular = 'occupancy' + per_object = 'atom' #TODO: update docs to property doc @@ -692,6 +703,7 @@ class AltLocs(AtomAttr): """AltLocs for each atom""" attrname = 'altLocs' singular = 'altLoc' + per_object = 'atom' ## residue attributes @@ -708,6 +720,7 @@ class ResidueAttr(TopologyAttr): attrname = 'residueattrs' singular = 'residueattr' target_levels = ['residue'] + per_object = 'residue' def get_atoms(self, ag): rix = self.top.tt.atoms2residues(ag._ix) @@ -802,6 +815,7 @@ class SegmentAttr(TopologyAttr): attrname = 'segmentattrs' singular = 'segmentattr' target_levels = ['segment'] + per_object = 'segment' def get_atoms(self, ag): six = self.top.tt.atoms2segments(ag._ix) diff --git a/package/MDAnalysis/core/topologyobjects.py b/package/MDAnalysis/core/topologyobjects.py index 105824acead..8fced8ace8a 100644 --- a/package/MDAnalysis/core/topologyobjects.py +++ b/package/MDAnalysis/core/topologyobjects.py @@ -577,6 +577,10 @@ def atomgroup_intersection(self, ag, **kwargs): .. versionadded:: 0.9.0 """ + # Issue #780 - if self is empty, return self to avoid invalid mask + if not self: + return self + # Strict requires all items in a row to be seen, # otherwise any item in a row func = np.all if kwargs.get('strict', False) else np.any diff --git a/package/MDAnalysis/core/universe.py b/package/MDAnalysis/core/universe.py index b3288271cac..ee127a18a84 100644 --- a/package/MDAnalysis/core/universe.py +++ b/package/MDAnalysis/core/universe.py @@ -1,12 +1,14 @@ import numpy as np from numpy.lib.utils import deprecate import logging +from itertools import groupby import MDAnalysis from ..lib import util from ..lib.util import cached from . import groups from .topology import Topology +from .topologyattrs import AtomAttr, ResidueAttr, SegmentAttr logger = logging.getLogger("MDAnalysis.core.universe") @@ -180,6 +182,13 @@ def __init__(self, *args, **kwargs): " with parser {1} \n" "Error: {2}".format(self.filename, parser, err)) + # generate and populate Universe version of each class + self._generate_from_topology() + + # Load coordinates + self.load_new(coordinatefile, **kwargs) + + def _generate_from_topology(self): # generate Universe version of each class # AG, RG, SG, A, R, S self._classes = groups.make_classes() @@ -207,9 +216,6 @@ def __init__(self, *args, **kwargs): name = seg.segid self.__dict__[name] = seg - # Load coordinates - self.load_new(coordinatefile, **kwargs) - @property def universe(self): # for Writer.write(universe), see Issue 49 @@ -383,6 +389,19 @@ def _process_attr(self, attr): - Component properties - Transplant methods """ + n_dict = {'atom': self._topology.n_atoms, + 'residue': self._topology.n_residues, + 'segment': self._topology.n_segments} + if hasattr(attr, 'per_object') and \ + len(attr) != n_dict[attr.per_object]: + raise ValueError('Length of {attr} does not' + ' match number of {obj}s.\n' + 'Expect: {n:d} Have: {m:d}'.format( + attr=attr.attrname, + obj=attr.per_object, + n=n_dict[attr.per_object], + m=len(attr))) + self._classes['group']._add_prop(attr) for level in attr.target_levels: @@ -496,26 +515,36 @@ def as_Universe(*args, **kwargs): asUniverse = deprecate(as_Universe, old_name='asUniverse', new_name='as_Universe') -#TODO: UPDATE ME WITH NEW TOPOLOGY DETAILS + def Merge(*args): """Return a :class:`Universe` from two or more :class:`AtomGroup` instances. + The resulting universe will only inherit the common topology attributes that + all merged universes share. + :class:`AtomGroup` instances can come from different Universes, or come directly from a :meth:`~Universe.select_atoms` call. It can also be used with a single :class:`AtomGroup` if the user wants to, for example, re-order the atoms in the Universe. - :Arguments: One or more :class:`AtomGroup` instances. + If multiple :class:`AtomGroup` instances from the same Universe are given, + the merge will first simply "add" together the :class:`AtomGroup` instances. - :Returns: an instance of :class:`~MDAnalaysis.AtomGroup.Universe` + Parameters + ---------- + args : One or more :class:`AtomGroup` instances. + + Returns + ------- + universe : An instance of :class:`~MDAnalaysis.AtomGroup.Universe` :Raises: :exc:`ValueError` for too few arguments or if an AtomGroup is empty and :exc:`TypeError` if arguments are not :class:`AtomGroup` instances. - .. rubric:: Example - + Example + ------- In this example, protein, ligand, and solvent were externally prepared in three different PDB files. They are loaded into separate :class:`Universe` objects (where they could be further manipulated, e.g. renumbered, @@ -530,71 +559,141 @@ def Merge(*args): The complete system is then written out to a new PDB file. - .. Note:: Merging does not create a full trajectory but only a single - structure even if the input consists of one or more trajectories. + Note + ---- + Merging does not create a full trajectory but only a single + structure even if the input consists of one or more trajectories. .. versionchanged 0.9.0:: Raises exceptions instead of assertion errors. """ - import MDAnalysis.topology.core + from ..topology.base import squash_by if len(args) == 0: raise ValueError("Need at least one AtomGroup for merging") for a in args: - if not isinstance(a, AtomGroup): + if not isinstance(a, groups.AtomGroup): raise TypeError(repr(a) + " is not an AtomGroup") for a in args: if len(a) == 0: raise ValueError("cannot merge empty AtomGroup") - coords = np.vstack([a.coordinates() for a in args]) - trajectory = MDAnalysis.coordinates.base.Reader(None) - ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords) - setattr(trajectory, "ts", ts) - trajectory.n_frames = 1 - - # create an empty Universe object - u = Universe() - u.trajectory = trajectory - - # create a list of Atoms, then convert it to an AtomGroup - atoms = [copy.copy(a) for gr in args for a in gr] - for a in atoms: - a.universe = u - - # adjust the atom numbering - for i, a in enumerate(atoms): - a.index = i - a.serial = i + 1 - u.atoms = AtomGroup(atoms) - - # move over the topology - offset = 0 - tops = ['bonds', 'angles', 'dihedrals', 'impropers'] - idx_lists = {t:[] for t in tops} - for ag in args: - # create a mapping scheme for this atomgroup - mapping = {a.index:i for i, a in enumerate(ag, start=offset)} - offset += len(ag) + # Create a new topology using the intersection of topology attributes + blank_topology_attrs = set(dir(Topology(attrs=[]))) + common_attrs = set.intersection(*[set(dir(ag.universe._topology)) + for ag in args]) + tops = set(['bonds', 'angles', 'dihedrals', 'impropers']) + + attrs = [] + + # Create set of attributes which are array-valued and can be simply + # concatenated together + common_array_attrs = common_attrs - blank_topology_attrs - tops + # Build up array-valued topology attributes including only attributes + # that all arguments' universes have + for attrname in common_array_attrs: + for ag in args: + attr = getattr(ag, attrname) + attr_class = type(getattr(ag.universe._topology, attrname)) + if issubclass(attr_class, AtomAttr): + pass + elif issubclass(attr_class, ResidueAttr): + attr = getattr(ag.residues, attrname) + elif issubclass(attr_class, SegmentAttr): + attr = getattr(ag.segments, attrname) + else: + raise NotImplementedError("Don't know how to handle" + " TopologyAttr not subclassed" + " from AtomAttr, ResidueAttr," + " or SegmentAttr.") + if type(attr) != np.ndarray: + raise TypeError('Encountered unexpected topology ' + 'attribute of type {}'.format(type(attr))) + try: + attr_array.extend(attr) + except NameError: + attr_array = list(attr) + attrs.append(attr_class(np.array(attr_array, dtype=attr.dtype))) + del attr_array + + # Build up topology groups including only those that all arguments' + # universes have + for t in (tops & common_attrs): + offset = 0 + bondidx = [] + types = [] + for ag in args: + # create a mapping scheme for this atomgroup + mapping = {a.index:i for i, a in enumerate(ag, start=offset)} + offset += len(ag) - for t in tops: tg = getattr(ag, t) + bonds_class = type(getattr(ag.universe._topology, t)) # Create a topology group of only bonds that are within this ag # ie we don't want bonds that extend out of the atomgroup tg = tg.atomgroup_intersection(ag, strict=True) # Map them so they refer to our new indices new_idx = [tuple(map(lambda x:mapping[x], entry)) - for entry in tg.to_indices()] - idx_lists[t].extend(new_idx) + for entry in tg.indices] + bondidx.extend(new_idx) + if hasattr(tg, '_bondtypes'): + types.extend(tg._bondtypes) + else: + types.extend([None]*len(tg)) + if any(t is None for t in types): + attrs.append(bonds_class(bondidx)) + else: + types = np.array(types, dtype='|S8') + attrs.append(bonds_class(bondidx, types)) + + # Renumber residue and segment indices + n_atoms = sum([len(ag) for ag in args]) + residx = [] + segidx = [] + res_offset = 0 + seg_offset = 0 + for ag in args: + # create a mapping scheme for this atomgroup's parents + res_mapping = {r.resindex: i for i, r in enumerate(ag.residues, + start=res_offset)} + seg_mapping = {r.segindex: i for i, r in enumerate(ag.segments, + start=seg_offset)} + res_offset += len(ag.residues) + seg_offset += len(ag.segments) + + # Map them so they refer to our new indices + residx.extend(map(lambda x:res_mapping[x], ag.resindices)) + segidx.extend(map(lambda x:seg_mapping[x], ag.segindices)) + + residx = np.array(residx, dtype=np.int32) + segidx = np.array(segidx, dtype=np.int32) - for t in tops: - u._topology[t] = idx_lists[t] + _, _, [segidx] = squash_by(residx, segidx) - # adjust the residue and segment numbering (removes any remaining references to the old universe) - MDAnalysis.topology.core.build_residues(u.atoms) - MDAnalysis.topology.core.build_segments(u.atoms) + n_residues = len(set(residx)) + n_segments = len(set(segidx)) + + top = Topology(n_atoms, n_residues, n_segments, + attrs=attrs, + atom_resindex=residx, + residue_segindex=segidx) + + # Create blank Universe and put topology in it + u = Universe() + u._topology = top + + # Generate universe and populate namespace + u._generate_from_topology() + + # Take one frame of coordinates from combined atomgroups + coords = np.vstack([a.positions for a in args]) + trajectory = MDAnalysis.coordinates.base.Reader(None) + ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords) + setattr(trajectory, "ts", ts) + trajectory.n_frames = 1 + u.trajectory = trajectory return u diff --git a/testsuite/MDAnalysisTests/test_modelling.py b/testsuite/MDAnalysisTests/test_modelling.py index 9f5ab3385f9..d70232ee0a7 100644 --- a/testsuite/MDAnalysisTests/test_modelling.py +++ b/testsuite/MDAnalysisTests/test_modelling.py @@ -19,8 +19,8 @@ TRZ, TRZ_psf, \ capping_input, capping_output, capping_ace, capping_nma, \ merge_protein, merge_ligand, merge_water -import MDAnalysis.core.AtomGroup -from MDAnalysis.core.AtomGroup import Atom, AtomGroup +import MDAnalysis.core.groups +from MDAnalysis.core.groups import make_classes from MDAnalysis import NoDataError import numpy as np @@ -34,6 +34,8 @@ from MDAnalysis import Universe, Merge from MDAnalysis.analysis.align import alignto +AtomGroup = make_classes()['atomgroup'] + def capping(ref, ace, nma, output): resids = ref.select_atoms("all").resids @@ -124,7 +126,6 @@ def test_capping_inmemory(self): assert_array_equal(peptide.trajectory.ts.dimensions, u.trajectory.ts.dimensions) - class TestMerge(TestCase): ext = "pdb" @@ -154,6 +155,12 @@ def test_merge(self): # Check that the output Universe has the same number of atoms as the # starting AtomGroups assert_equal(len(u0.atoms), (len(u1.atoms) + len(u2.atoms) + len(u3.atoms))) + # Check that the output Universe has the same number of residues and + # segments as the starting AtomGroups + assert_equal(len(u0.residues), (len(u1.residues) + len(u2.residues) + + len(u3.residues))) + assert_equal(len(u0.segments), (len(u1.segments) + len(u2.segments) + + len(u3.segments))) # Make sure that all the atoms in the new universe are assigned to only # one, new Universe @@ -177,6 +184,14 @@ def test_merge(self): ids_new2 = [a.index for a in u.atoms] assert_equal(ids_new, ids_new2) + def test_merge_same_universe(self): + u1, _, _ = self.universes + + u0 = MDAnalysis.Merge(u1.atoms, u1.atoms, u1.atoms) + assert_equal(len(u0.atoms), 3*len(u1.atoms)) + assert_equal(len(u0.residues), 3*len(u1.residues)) + assert_equal(len(u0.segments), 3*len(u1.segments)) + def test_residue_references(self): u1, u2, u3 = self.universes m = Merge(u1.atoms, u2.atoms) @@ -196,8 +211,8 @@ def test_nonsense_TypeError(self): assert_raises(TypeError, Merge, ['1', 2]) def test_emptyAG_ValueError(self): - a = AtomGroup([]) - b = AtomGroup([]) + a = AtomGroup([], None) + b = AtomGroup([], None) assert_raises(ValueError, Merge, a, b) @@ -205,9 +220,11 @@ class TestMergeTopology(object): """Test that Merge correct does topology""" def setUp(self): self.u = MDAnalysis.Universe(PSF, DCD) + self.u2 = MDAnalysis.Universe(merge_protein) def tearDown(self): del self.u + del self.u2 def test_merge_with_topology(self): ag1 = self.u.atoms[:20] @@ -216,25 +233,36 @@ def test_merge_with_topology(self): u2 = MDAnalysis.Merge(ag1, ag2) assert_(len(u2.atoms) == 30) - assert_(len(u2.bonds) == 28) - assert_(len(u2.angles) == 47) - assert_(len(u2.dihedrals) == 53) - assert_(len(u2.impropers) == 1) + assert_(len(u2.atoms.bonds) == 28) + assert_(len(u2.atoms.angles) == 47) + assert_(len(u2.atoms.dihedrals) == 53) + assert_(len(u2.atoms.impropers) == 1) # All these bonds are in the merged Universe assert_(len(ag1[0].bonds) == len(u2.atoms[0].bonds)) # One of these bonds isn't in the merged Universe assert_(len(ag2[0].bonds) -1 == len(u2.atoms[20].bonds)) + def test_merge_with_topology_from_different_universes(self): + u3 = MDAnalysis.Merge(self.u.atoms[:110], self.u2.atoms) + + # merge_protein doesn't contain bond topology, so merged universe + # shouldn't have one either + print(u3.atoms.bonds) + # PDB reader yields empty Bonds group, which means bonds from + # PSF/DCD survive the merge + #assert_(not hasattr(u3.atoms, 'bonds') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'angles') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'dihedrals') or len(u3.atoms.bonds) == 0) + assert_(not hasattr(u3.atoms, 'impropers') or len(u3.atoms.bonds) == 0) + def test_merge_without_topology(self): # This shouldn't have topology as we merged single atoms - ag1 = AtomGroup([self.u.atoms[1]]) - ag2 = AtomGroup([self.u.atoms[10]]) - - u2 = MDAnalysis.Merge(ag1, ag2) + u2 = MDAnalysis.Merge(self.u.atoms[0:1], self.u.atoms[10:11]) assert_(len(u2.atoms) == 2) - assert_(len(u2.bonds) == 0) - assert_(len(u2.angles) == 0) - assert_(len(u2.dihedrals) == 0) - assert_(len(u2.impropers) == 0) + assert_(len(u2.atoms.bonds) == 0) + assert_(len(u2.atoms.angles) == 0) + assert_(len(u2.atoms.dihedrals) == 0) + assert_(len(u2.atoms.impropers) == 0) +