Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Merge for issue-363 branch #781

Merged
merged 4 commits into from
Apr 10, 2016
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package/MDAnalysis/core/topology.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ class Topology(object):
"""

def __init__(self, n_atoms=1, n_res=1, n_seg=1,
attrs=None,
attrs=[],
atom_resindex=None,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No mutable keyword arguments are a bad idea. Google can tell you why.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

https://www.codementor.io/python/tutorial/essential-python-interview-questions

questions 6. The others are also a good read for people new to python.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for that. Definitely good to know -- I'll have to review the other points too at some point

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have a look at the Quantified Code report. It flagged this issue and provides links to explanations.

residue_segindex=None):
self.n_atoms = n_atoms
Expand Down
191 changes: 140 additions & 51 deletions package/MDAnalysis/core/universe.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from numpy.lib.utils import deprecate
import logging
import itertools

import MDAnalysis
from ..lib import util
Expand Down Expand Up @@ -496,7 +497,7 @@ def as_Universe(*args, **kwargs):

asUniverse = deprecate(as_Universe, old_name='asUniverse', new_name='as_Universe')

#TODO: UPDATE ME WITH NEW TOPOLOGY DETAILS

def Merge(*args):
"""Return a :class:`Universe` from two or more :class:`AtomGroup` instances.

Expand All @@ -506,16 +507,23 @@ def Merge(*args):
It can also be used with a single :class:`AtomGroup` if the user wants to,
for example, re-order the atoms in the Universe.

:Arguments: One or more :class:`AtomGroup` instances.
If multiple :class:`AtomGroup` instances from the same Universe are given,
the merge will first simply "add" together the :class:`AtomGroup` instances.

:Returns: an instance of :class:`~MDAnalaysis.AtomGroup.Universe`
Parameters
----------
args : One or more :class:`AtomGroup` instances.

Returns
-------
universe : An instance of :class:`~MDAnalaysis.AtomGroup.Universe`

:Raises: :exc:`ValueError` for too few arguments or if an AtomGroup is
empty and :exc:`TypeError` if arguments are not
:class:`AtomGroup` instances.

.. rubric:: Example

Example
-------
In this example, protein, ligand, and solvent were externally prepared in
three different PDB files. They are loaded into separate :class:`Universe`
objects (where they could be further manipulated, e.g. renumbered,
Expand All @@ -530,71 +538,152 @@ def Merge(*args):

The complete system is then written out to a new PDB file.

.. Note:: Merging does not create a full trajectory but only a single
structure even if the input consists of one or more trajectories.
Note
----
Merging does not create a full trajectory but only a single
structure even if the input consists of one or more trajectories.

.. versionchanged 0.9.0::
Raises exceptions instead of assertion errors.

"""
import MDAnalysis.topology.core

if len(args) == 0:
raise ValueError("Need at least one AtomGroup for merging")

for a in args:
if not isinstance(a, AtomGroup):
if not isinstance(a, groups.AtomGroup):
raise TypeError(repr(a) + " is not an AtomGroup")
for a in args:
if len(a) == 0:
raise ValueError("cannot merge empty AtomGroup")

coords = np.vstack([a.coordinates() for a in args])
# If any atom groups come from the same Universe, just add them
# together first
disjoint_atom_groups = []
already_added = []
for a, b in itertools.combinations(args, r=2):
if a in already_added and b in already_added:
continue
if a.universe is b.universe:
disjoint_atom_groups.append(a + b)
already_added.extend([a, b])
else:
if a not in already_added:
disjoint_atom_groups.append(a)
already_added.append(a)
if b not in already_added:
disjoint_atom_groups.append(b)
already_added.append(b)

u = Universe()
# Create a new topology using the intersection of topology attributes
blank_topology_attrs = set(dir(Topology(attrs=[])))
common_attrs = set.intersection(*[set(dir(ag.universe._topology))
for ag in disjoint_atom_groups])
topology_groups = set(['bonds', 'angles', 'dihedrals', 'impropers'])

# Create set of array-valued attributes which can be simply
# concatenated together
keep_attrs = common_attrs - blank_topology_attrs - topology_groups

attrs = []
dtypes = {}
for attrname in keep_attrs:
for ag in disjoint_atom_groups:
attr = getattr(ag, attrname)
type_attr = type(getattr(ag.universe._topology, attrname))
if type(attr) != np.ndarray:
raise TypeError('Encountered unexpected topology'+
'attribute of type {}'.format(
type(attr)))
try:
attr_array.extend(attr)
except NameError:
attr_array = list(attr)
attrs.append(type_attr(np.array(attr_array,
dtype=attr.dtype)))
del attr_array

# Build up topology groups
for tg in (topology_groups & common_attrs):
bondidx = []
types = []
offset = 0
for ag in disjoint_atom_groups:
bonds = getattr(ag, tg)
bond_class = type(getattr(ag.universe._topology, tg))
bondidx.extend(bonds.indices + offset)
if hasattr(bonds, '_bondtypes'):
types.extend(bonds.types())
else:
types.extend([None]*len(bonds))
offset += len(ag)
bondidx = np.array(bondidx, dtype=np.int32)
if any([t is None for t in types]):
attrs.append(bond_class(values))
else:
types = np.array(types, dtype='|S8')
attrs.append(bond_class(bondidx, types))

# Renumber residue and segment indices
n_atoms = sum([len(ag) for ag in disjoint_atom_groups])
residx = []
segidx = []
for ag in disjoint_atom_groups:
res_offset = len(set(residx))
resdict = {n: i+res_offset for i, n in enumerate(set(ag.resindices))}
seg_offset = len(set(segidx))
segdict = {n: i+len(set(segidx)) for i, n in enumerate(set(ag.segindices))}
residx.extend([resdict[n] for n in ag.resindices])
segidx.extend([segdict[n] for n in ag.segindices])

residx = np.array(residx, dtype=np.int32)
segidx = np.array(segidx, dtype=np.int32)

n_residues = len(set(residx))
n_segments = len(set(segidx))

top = Topology(n_atoms, n_residues, n_segments,
attrs=attrs,
atom_resindex=residx,
residue_segindex=segidx)

# Put topology in Universe
u._topology = top

# generate Universe version of each class
# AG, RG, SG, A, R, S
u._classes = groups.make_classes()

# Put Group level stuff from topology into class
for attr in u._topology.attrs:
u._process_attr(attr)

# Generate atoms, residues and segments
u.atoms = u._classes['atomgroup'](
np.arange(u._topology.n_atoms), u)

u.residues = u._classes['residuegroup'](
np.arange( u._topology.n_residues), u)

u.segments = u._classes['segmentgroup'](np.arange(
u._topology.n_segments), u)

# Update Universe namespace with segids
for seg in u.segments:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So with things like this, it's much better to move the original version into a method (something like _make_segid_namespace), and then call that method from the two places it gets called from. Then if we change how this works it gets updated everywhere

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since lines 184-209 and 646-671 are identical, I could put them in a universe._generate_from_topology().

if hasattr(seg, 'segid'):
if seg.segid[0].isdigit():
name = 's' + seg.segid
else:
name = seg.segid
u.__dict__[name] = seg

coords = np.vstack([a.positions for a in disjoint_atom_groups])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So is the order of the atoms as put into Merge preserved?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No - I was a bit silly implementing this the way I did, shouldn't be changing the behavior from the old Merge. I was trying to avoid thinking about how residue indices should be assigned to atoms in the following merges, if universes u and u2 each have 10 atoms belonging to 1 residue:

mda.Merge(u.atoms[:5], u.atoms[5:10])
mda.Merge(u.atoms, u.atoms)
mda.Merge(u.atoms, u2.atoms, u.atoms)

and tried avoiding the problem by adding together atomgroups from the same universe first, but I disrupted the order in the process.

So question - if atomgroups from the same universe belong to the same residue/segment pre-merge, should they have the same residue/segment post-merge, or they should have different residue/segment indices post-merge?

I'm taking for granted that residues from different universes pre-merge should not belong to the same residues (i.e. have the same residue indices) post-merge.

trajectory = MDAnalysis.coordinates.base.Reader(None)
ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords)
setattr(trajectory, "ts", ts)
trajectory.n_frames = 1

# create an empty Universe object
u = Universe()
u.trajectory = trajectory

# create a list of Atoms, then convert it to an AtomGroup
atoms = [copy.copy(a) for gr in args for a in gr]
for a in atoms:
a.universe = u

# adjust the atom numbering
for i, a in enumerate(atoms):
a.index = i
a.serial = i + 1
u.atoms = AtomGroup(atoms)

# move over the topology
offset = 0
tops = ['bonds', 'angles', 'dihedrals', 'impropers']
idx_lists = {t:[] for t in tops}
for ag in args:
# create a mapping scheme for this atomgroup
mapping = {a.index:i for i, a in enumerate(ag, start=offset)}
offset += len(ag)

for t in tops:
tg = getattr(ag, t)
# Create a topology group of only bonds that are within this ag
# ie we don't want bonds that extend out of the atomgroup
tg = tg.atomgroup_intersection(ag, strict=True)

# Map them so they refer to our new indices
new_idx = [tuple(map(lambda x:mapping[x], entry))
for entry in tg.to_indices()]
idx_lists[t].extend(new_idx)

for t in tops:
u._topology[t] = idx_lists[t]

# adjust the residue and segment numbering (removes any remaining references to the old universe)
MDAnalysis.topology.core.build_residues(u.atoms)
MDAnalysis.topology.core.build_segments(u.atoms)

return u