Skip to content

Commit

Permalink
Fixed incorrect handling of residue names with trailing numbers in Hy…
Browse files Browse the repository at this point in the history
…drogenBondAnalysis (issue #801)

- fixes #801
- analysis.hbond.hbond_analysis.HydrogenBondAnalysis now correctly stores and parses
  donor and acceptor names and is not tripped up by resnames that end in numbers, such
  as TIP3
- store timeseries as _timeseries with donor and acceptor atom identifiers as
  tuples instead of strings (avoids the use of fragile lib.util.parse_residue())
- made timeseries a property that is generated on the fly so that old code does not break, but
  it is not cached (to avoid memory consumption for big trajectories) and so users should cache
  themselves if needed
- added tests
- rewrote parts of the docs and added notes on use of timeseries
- updated DEPRECATION warning for 1-based indices to 0.17.0 (should have been removed in
  0.16.0 but we forgot)
  • Loading branch information
orbeckst committed May 11, 2017
1 parent 0764299 commit 2d5a191
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 54 deletions.
6 changes: 4 additions & 2 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ The rules for this file:
------------------------------------------------------------------------------

mm/dd/17 utkbansal, kain88-de, xiki-tempula, kaplajon, wouterboomsma,
richardjgowers, Shtkddud123, QuantumEntangledAndy
richardjgowers, Shtkddud123, QuantumEntangledAndy, orbeckst

* 0.16.1

Expand All @@ -38,7 +38,9 @@ Fixes
(PR #1325)
* Fix PDBParser docs for conect (issue #1246)
* Fixed bug where amber topology files would fail to load if number of atoms was
exectly divisible by number of atoms per line (issue #1331)
exactly divisible by number of atoms per line (issue #1331)
* Fixed incorrect handling of residue names with trailing numbers in
HydrogenBondAnalysis (issue #801)
* Fixed AnalysisBase class provides numerical start,stop,step values (PR #1340)

Changes
Expand Down
154 changes: 102 additions & 52 deletions package/MDAnalysis/analysis/hbonds/hbond_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,10 @@
MDAnalysis simply subtract 1. For instance, to find an atom in
:attr:`Universe.atoms` by *index* from the output one would use
``u.atoms[index-1]``.
.. deprecated:: 0.15.0
This feature is being deprecated in favor of zero-based indices and is targeted
for removal in 0.16.0.
The 1-based indices are being deprecated in favor of zero-based indices
and is targeted for removal in 0.17.0.
Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat
Expand Down Expand Up @@ -258,44 +259,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
:attr:`~HydrogenBondAnalysis.timeseries` to find the specific time point
of a hydrogen bond existence, or see :attr:`~HydrogenBondAnalysis.table`.
.. attribute:: timeseries
Results of the hydrogen bond analysis, stored for each frame. In
the following description, # indicates comments that are not
part of the output::
results = [
[ # frame 1
[ # hbond 1
<donor index (1-based)>, <acceptor index (1-based)>, <donor index (0-based)>,
<acceptor index (0-based)>, <donor string>, <acceptor string>,
<distance>, <angle>
],
[ # hbond 2
<donor index (1-based)>, <acceptor index (1-based)>, <donor index (0-based)>,
<acceptor index (0-based)>, <donor string>, <acceptor string>,
<distance>, <angle>
],
....
],
[ # frame 2
[ ... ], [ ... ], ...
],
...
]
The time of each step is not stored with each hydrogen bond frame but in
:attr:`~HydrogenBondAnalysis.timesteps`.
.. Note::
The *index* is a 1-based index. To get the :attr:`Atom.index` (the
0-based index typically used in MDAnalysis simply subtract 1. For
instance, to find an atom in :attr:`Universe.atoms` by *index* one
would use ``u.atoms[index-1]``.
.. attribute:: table
A normalised table of the data in
Expand Down Expand Up @@ -327,7 +290,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
0-based index typically used in MDAnalysis simply subtract 1. For
instance, to find an atom in :attr:`Universe.atoms` by *index* one
would use ``u.atoms[idx_zero]``. The 1-based index is deprecated and
targeted for removal in 0.16.0
targeted for removal in 0.17.0
Expand All @@ -341,7 +304,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
The donor and acceptor indices being 1-based is deprecated in favor of
a zero-based index. This can be accessed by "donor_index" or
"acceptor_index" removal of the 1-based indices is targeted
for version 0.16.0
for version 0.17.0
"""
from __future__ import division, absolute_import
Expand All @@ -354,7 +317,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
import logging

from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning
from MDAnalysis.lib.util import parse_residue
from MDAnalysis.lib.mdamath import norm, angle
from MDAnalysis.lib.log import ProgressMeter, _set_verbose
from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch
Expand Down Expand Up @@ -583,7 +545,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_
"The donor and acceptor indices being 1-based is deprecated in favor"
" of a zero-based index. These can be accessed by 'donor_index' or"
" 'acceptor_index', removal of the 1-based indices is targeted for"
" version 0.16.0", category=DeprecationWarning)
" version 0.17.0", category=DeprecationWarning)

self._get_bonded_hydrogens_algorithms = {
"distance": self._get_bonded_hydrogens_dist, # 0.7.6 default
Expand Down Expand Up @@ -858,7 +820,8 @@ def run(self, **kwargs):
Note
----
Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data into a different format.
Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data
into a different format.
.. versionchanged:: 0.7.6
Expand All @@ -875,7 +838,7 @@ def run(self, **kwargs):
The donor and acceptor indices being 1-based is deprecated in favor
of a zero-based index. This can be accessed by "donor_index" or
"acceptor_index" removal of the 1-based indices is targeted
for version 0.16.0
for version 0.17.0
.. deprecated:: 0.16
The *quiet* keyword argument is deprecated in favor of the *verbose*
Expand Down Expand Up @@ -1019,9 +982,96 @@ def calc_eucl_distance(a1, a2):

@property
def timeseries(self):
"""Time series of hydrogen bonds."""
"""Time series of hydrogen bonds.
The results of the hydrogen bond analysis can be accessed as a `list` of `list` of `list`:
1. `timeseries[i]`: data for the i-th trajectory frame (at time
`timesteps[i]`, see :attr:`timesteps`)
2. `timeseries[i][j]`: j-th hydrogen bond that was detected at the i-th
frame.
3. ``donor_idx, acceptor_idx, donor_index, acceptor_index,
donor_name_str, acceptor_name_str, distance, angle =
timeseries[i][j]``: structure of one hydrogen bond data item
In the following description, ``#`` indicates comments that are not
part of the output::
results = [
[ # frame 1
[ # hbond 1
<donor index (1-based)>, <acceptor index (1-based)>, <donor index (0-based)>,
<acceptor index (0-based)>, <donor string>, <acceptor string>,
<distance>, <angle>
],
[ # hbond 2
<donor index (1-based)>, <acceptor index (1-based)>, <donor index (0-based)>,
<acceptor index (0-based)>, <donor string>, <acceptor string>,
<distance>, <angle>
],
....
],
[ # frame 2
[ ... ], [ ... ], ...
],
...
]
The time of each step is not stored with each hydrogen bond frame but in
:attr:`~HydrogenBondAnalysis.timesteps`.
Note
----
The *index* is a 1-based index. To get the :attr:`Atom.index` (the
0-based index typically used in MDAnalysis simply subtract 1. For
instance, to find an atom in :attr:`Universe.atoms` by *index* one
would use ``u.atoms[index-1]``.
return self._timeseries
The :attr:`timeseries` is a managed attribute and it is generated from
the underlying data in :attr:`_timeseries` every time the attribute is
accessed. It is therefore costly to call and if :attr:`timeseries` is
needed repeatedly it is recommended that you assign to a variable::
h = HydrogenBondAnalysis(u)
h.run()
timeseries = h.timeseries
See Also
--------
HydrogenBondAnalysis.table : structured array of the data
.. versionchanged:: 0.16.1
:attr:`timeseries` has become a managed attribute and is generated from the stored
:attr:`_timeseries` when needed. :attr:`_timeseries` contains the donor atom and
acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings.
.. deprecated:: 0.15.0
The 1-based indices are being deprecated in favor of zero-based indices
and they are targeted for removal in 0.17.0.
.. deprecated:: 1.0
:attr:`timeseries` will be replaced/changed
"""
return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries]

@staticmethod
def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"):
"""Convert 0.16.1 _timeseries hbond item to 0.16.0 hbond item.
In 0.16.1, donor and acceptor are stored as a tuple(resname,
resid, atomid). In 0.16.0 and earlier they were stored as a string.
.. deprecated:: 1.0
"""
# change indices once we remove 1-based donor_idx and acceptor_idx
return (hb[:4]
+ [atomformat.format(hb[4]), atomformat.format(hb[5])]
+ hb[6:])

def generate_table(self):
"""Generate a normalised table of the results.
Expand Down Expand Up @@ -1129,8 +1179,8 @@ def count_by_type(self):
for hframe in self._timeseries:
for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
acceptor, distance, angle) in hframe:
donor_resnm, donor_resid, donor_atom = parse_residue(donor)
acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor)
donor_resnm, donor_resid, donor_atom = donor
acceptor_resnm, acceptor_resid, acceptor_atom = acceptor
# generate unambigous key for current hbond \
# (the donor_heavy_atom placeholder '?' is added later)
# idx_zero is redundant for an unambigous key, but included for
Expand Down Expand Up @@ -1190,8 +1240,8 @@ def timesteps_by_type(self):
for (t, hframe) in zip(self.timesteps, self._timeseries):
for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
acceptor, distance, angle) in hframe:
donor_resnm, donor_resid, donor_atom = parse_residue(donor)
acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor)
donor_resnm, donor_resid, donor_atom = donor
acceptor_resnm, acceptor_resid, acceptor_atom = acceptor
# generate unambigous key for current hbond
# (the donor_heavy_atom placeholder '?' is added later)
# idx_zero is redundant for key but added for consistency
Expand Down

0 comments on commit 2d5a191

Please sign in to comment.