-
Notifications
You must be signed in to change notification settings - Fork 648
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix Merge for issue-363 branch #781
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import numpy as np | ||
from numpy.lib.utils import deprecate | ||
import logging | ||
import itertools | ||
|
||
import MDAnalysis | ||
from ..lib import util | ||
|
@@ -496,7 +497,7 @@ def as_Universe(*args, **kwargs): | |
|
||
asUniverse = deprecate(as_Universe, old_name='asUniverse', new_name='as_Universe') | ||
|
||
#TODO: UPDATE ME WITH NEW TOPOLOGY DETAILS | ||
|
||
def Merge(*args): | ||
"""Return a :class:`Universe` from two or more :class:`AtomGroup` instances. | ||
|
||
|
@@ -506,16 +507,23 @@ def Merge(*args): | |
It can also be used with a single :class:`AtomGroup` if the user wants to, | ||
for example, re-order the atoms in the Universe. | ||
|
||
:Arguments: One or more :class:`AtomGroup` instances. | ||
If multiple :class:`AtomGroup` instances from the same Universe are given, | ||
the merge will first simply "add" together the :class:`AtomGroup` instances. | ||
|
||
:Returns: an instance of :class:`~MDAnalaysis.AtomGroup.Universe` | ||
Parameters | ||
---------- | ||
args : One or more :class:`AtomGroup` instances. | ||
|
||
Returns | ||
------- | ||
universe : An instance of :class:`~MDAnalaysis.AtomGroup.Universe` | ||
|
||
:Raises: :exc:`ValueError` for too few arguments or if an AtomGroup is | ||
empty and :exc:`TypeError` if arguments are not | ||
:class:`AtomGroup` instances. | ||
|
||
.. rubric:: Example | ||
|
||
Example | ||
------- | ||
In this example, protein, ligand, and solvent were externally prepared in | ||
three different PDB files. They are loaded into separate :class:`Universe` | ||
objects (where they could be further manipulated, e.g. renumbered, | ||
|
@@ -530,71 +538,152 @@ def Merge(*args): | |
|
||
The complete system is then written out to a new PDB file. | ||
|
||
.. Note:: Merging does not create a full trajectory but only a single | ||
structure even if the input consists of one or more trajectories. | ||
Note | ||
---- | ||
Merging does not create a full trajectory but only a single | ||
structure even if the input consists of one or more trajectories. | ||
|
||
.. versionchanged 0.9.0:: | ||
Raises exceptions instead of assertion errors. | ||
|
||
""" | ||
import MDAnalysis.topology.core | ||
|
||
if len(args) == 0: | ||
raise ValueError("Need at least one AtomGroup for merging") | ||
|
||
for a in args: | ||
if not isinstance(a, AtomGroup): | ||
if not isinstance(a, groups.AtomGroup): | ||
raise TypeError(repr(a) + " is not an AtomGroup") | ||
for a in args: | ||
if len(a) == 0: | ||
raise ValueError("cannot merge empty AtomGroup") | ||
|
||
coords = np.vstack([a.coordinates() for a in args]) | ||
# If any atom groups come from the same Universe, just add them | ||
# together first | ||
disjoint_atom_groups = [] | ||
already_added = [] | ||
for a, b in itertools.combinations(args, r=2): | ||
if a in already_added and b in already_added: | ||
continue | ||
if a.universe is b.universe: | ||
disjoint_atom_groups.append(a + b) | ||
already_added.extend([a, b]) | ||
else: | ||
if a not in already_added: | ||
disjoint_atom_groups.append(a) | ||
already_added.append(a) | ||
if b not in already_added: | ||
disjoint_atom_groups.append(b) | ||
already_added.append(b) | ||
|
||
u = Universe() | ||
# Create a new topology using the intersection of topology attributes | ||
blank_topology_attrs = set(dir(Topology(attrs=[]))) | ||
common_attrs = set.intersection(*[set(dir(ag.universe._topology)) | ||
for ag in disjoint_atom_groups]) | ||
topology_groups = set(['bonds', 'angles', 'dihedrals', 'impropers']) | ||
|
||
# Create set of array-valued attributes which can be simply | ||
# concatenated together | ||
keep_attrs = common_attrs - blank_topology_attrs - topology_groups | ||
|
||
attrs = [] | ||
dtypes = {} | ||
for attrname in keep_attrs: | ||
for ag in disjoint_atom_groups: | ||
attr = getattr(ag, attrname) | ||
type_attr = type(getattr(ag.universe._topology, attrname)) | ||
if type(attr) != np.ndarray: | ||
raise TypeError('Encountered unexpected topology'+ | ||
'attribute of type {}'.format( | ||
type(attr))) | ||
try: | ||
attr_array.extend(attr) | ||
except NameError: | ||
attr_array = list(attr) | ||
attrs.append(type_attr(np.array(attr_array, | ||
dtype=attr.dtype))) | ||
del attr_array | ||
|
||
# Build up topology groups | ||
for tg in (topology_groups & common_attrs): | ||
bondidx = [] | ||
types = [] | ||
offset = 0 | ||
for ag in disjoint_atom_groups: | ||
bonds = getattr(ag, tg) | ||
bond_class = type(getattr(ag.universe._topology, tg)) | ||
bondidx.extend(bonds.indices + offset) | ||
if hasattr(bonds, '_bondtypes'): | ||
types.extend(bonds.types()) | ||
else: | ||
types.extend([None]*len(bonds)) | ||
offset += len(ag) | ||
bondidx = np.array(bondidx, dtype=np.int32) | ||
if any([t is None for t in types]): | ||
attrs.append(bond_class(values)) | ||
else: | ||
types = np.array(types, dtype='|S8') | ||
attrs.append(bond_class(bondidx, types)) | ||
|
||
# Renumber residue and segment indices | ||
n_atoms = sum([len(ag) for ag in disjoint_atom_groups]) | ||
residx = [] | ||
segidx = [] | ||
for ag in disjoint_atom_groups: | ||
res_offset = len(set(residx)) | ||
resdict = {n: i+res_offset for i, n in enumerate(set(ag.resindices))} | ||
seg_offset = len(set(segidx)) | ||
segdict = {n: i+len(set(segidx)) for i, n in enumerate(set(ag.segindices))} | ||
residx.extend([resdict[n] for n in ag.resindices]) | ||
segidx.extend([segdict[n] for n in ag.segindices]) | ||
|
||
residx = np.array(residx, dtype=np.int32) | ||
segidx = np.array(segidx, dtype=np.int32) | ||
|
||
n_residues = len(set(residx)) | ||
n_segments = len(set(segidx)) | ||
|
||
top = Topology(n_atoms, n_residues, n_segments, | ||
attrs=attrs, | ||
atom_resindex=residx, | ||
residue_segindex=segidx) | ||
|
||
# Put topology in Universe | ||
u._topology = top | ||
|
||
# generate Universe version of each class | ||
# AG, RG, SG, A, R, S | ||
u._classes = groups.make_classes() | ||
|
||
# Put Group level stuff from topology into class | ||
for attr in u._topology.attrs: | ||
u._process_attr(attr) | ||
|
||
# Generate atoms, residues and segments | ||
u.atoms = u._classes['atomgroup']( | ||
np.arange(u._topology.n_atoms), u) | ||
|
||
u.residues = u._classes['residuegroup']( | ||
np.arange( u._topology.n_residues), u) | ||
|
||
u.segments = u._classes['segmentgroup'](np.arange( | ||
u._topology.n_segments), u) | ||
|
||
# Update Universe namespace with segids | ||
for seg in u.segments: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So with things like this, it's much better to move the original version into a method (something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since lines 184-209 and 646-671 are identical, I could put them in a |
||
if hasattr(seg, 'segid'): | ||
if seg.segid[0].isdigit(): | ||
name = 's' + seg.segid | ||
else: | ||
name = seg.segid | ||
u.__dict__[name] = seg | ||
|
||
coords = np.vstack([a.positions for a in disjoint_atom_groups]) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So is the order of the atoms as put into Merge preserved? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No - I was a bit silly implementing this the way I did, shouldn't be changing the behavior from the old Merge. I was trying to avoid thinking about how residue indices should be assigned to atoms in the following merges, if universes
and tried avoiding the problem by adding together atomgroups from the same universe first, but I disrupted the order in the process. So question - if atomgroups from the same universe belong to the same residue/segment pre-merge, should they have the same residue/segment post-merge, or they should have different residue/segment indices post-merge? I'm taking for granted that residues from different universes pre-merge should not belong to the same residues (i.e. have the same residue indices) post-merge. |
||
trajectory = MDAnalysis.coordinates.base.Reader(None) | ||
ts = MDAnalysis.coordinates.base.Timestep.from_coordinates(coords) | ||
setattr(trajectory, "ts", ts) | ||
trajectory.n_frames = 1 | ||
|
||
# create an empty Universe object | ||
u = Universe() | ||
u.trajectory = trajectory | ||
|
||
# create a list of Atoms, then convert it to an AtomGroup | ||
atoms = [copy.copy(a) for gr in args for a in gr] | ||
for a in atoms: | ||
a.universe = u | ||
|
||
# adjust the atom numbering | ||
for i, a in enumerate(atoms): | ||
a.index = i | ||
a.serial = i + 1 | ||
u.atoms = AtomGroup(atoms) | ||
|
||
# move over the topology | ||
offset = 0 | ||
tops = ['bonds', 'angles', 'dihedrals', 'impropers'] | ||
idx_lists = {t:[] for t in tops} | ||
for ag in args: | ||
# create a mapping scheme for this atomgroup | ||
mapping = {a.index:i for i, a in enumerate(ag, start=offset)} | ||
offset += len(ag) | ||
|
||
for t in tops: | ||
tg = getattr(ag, t) | ||
# Create a topology group of only bonds that are within this ag | ||
# ie we don't want bonds that extend out of the atomgroup | ||
tg = tg.atomgroup_intersection(ag, strict=True) | ||
|
||
# Map them so they refer to our new indices | ||
new_idx = [tuple(map(lambda x:mapping[x], entry)) | ||
for entry in tg.to_indices()] | ||
idx_lists[t].extend(new_idx) | ||
|
||
for t in tops: | ||
u._topology[t] = idx_lists[t] | ||
|
||
# adjust the residue and segment numbering (removes any remaining references to the old universe) | ||
MDAnalysis.topology.core.build_residues(u.atoms) | ||
MDAnalysis.topology.core.build_segments(u.atoms) | ||
|
||
return u |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
No mutable keyword arguments are a bad idea. Google can tell you why.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://www.codementor.io/python/tutorial/essential-python-interview-questions
questions 6. The others are also a good read for people new to python.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for that. Definitely good to know -- I'll have to review the other points too at some point
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have a look at the Quantified Code report. It flagged this issue and provides links to explanations.