From c84cb4b7f45eaa9926ec0cd8f2d0383d6e9919e0 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 9 May 2017 14:03:03 -0700 Subject: [PATCH 1/6] test for correct TIP3P resname in hbond analysis table (#801) --- .../analysis/hbonds/hbond_analysis.py | 41 ++++---- .../MDAnalysisTests/analysis/test_hbonds.py | 96 ++++++++++++++++++- 2 files changed, 118 insertions(+), 19 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 3065dbfbad8..82375507a4b 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -620,7 +620,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ elif self.selection1_type not in ('both', 'donor', 'acceptor'): raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format(self.selection1_type)) - self.timeseries = None # final result + self._timeseries = None # final result accessed as self.timeseries self.timesteps = None # time for each frame self.table = None # placeholder for output table @@ -898,7 +898,7 @@ def run(self, **kwargs): if not self.debug: logger.debug("HBond analysis: For full step-by-step debugging output use debug=True") - self.timeseries = [] + self._timeseries = [] self.timesteps = [] logger.info("checking trajectory...") # n_frames can take a while! @@ -968,8 +968,8 @@ def _get_timestep(): #self.logger_debug("S1-D: %r <-> S2-A: %r %f A, %f DEG" % (h, a, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, - '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name), - '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name), + (h.resname, h.resid, h.name), + (a.resname, a.resid, a.name), dist, angle]) already_found[(h.index + 1, a.index + 1)] = True @@ -994,11 +994,11 @@ def _get_timestep(): #self.logger_debug("S1-A: %r <-> S2-D: %r %f A, %f DEG" % (a, h, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, - '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name), - '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name), + (h.resname, h.resid, h.name), + (a.resname, a.resid, a.name), dist, angle]) - self.timeseries.append(frame_results) + self._timeseries.append(frame_results) logger.info("HBond analysis: complete; timeseries with %d hbonds in %s.timeseries", self.count_by_time().count.sum(), self.__class__.__name__) @@ -1017,6 +1017,12 @@ def calc_eucl_distance(a1, a2): """Calculate the Euclidean distance between two atoms. """ return norm(a2.position - a1.position) + @property + def timeseries(self): + """Time series of hydrogen bonds.""" + + return self._timeseries + def generate_table(self): """Generate a normalised table of the results. @@ -1042,13 +1048,13 @@ def generate_table(self): .. _recsql: http://pypi.python.org/pypi/RecSQL """ - if self.timeseries is None: + if self._timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return - num_records = np.sum([len(hframe) for hframe in self.timeseries]) + num_records = np.sum([len(hframe) for hframe in self._timeseries]) # build empty output table dtype = [ ("time", float), ("donor_idx", int), ("acceptor_idx", int), @@ -1060,11 +1066,12 @@ def generate_table(self): # and speedups of ~x10 can be achieved by filling a standard array, like this: out = np.empty((num_records,), dtype=dtype) cursor = 0 # current row - for t, hframe in zip(self.timesteps, self.timeseries): + for t, hframe in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, acceptor, distance, angle) in hframe: + # donor|acceptor = (resname, resid, atomid) out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \ - parse_residue(donor) + parse_residue(acceptor) + (distance, angle) + donor + acceptor + (distance, angle) cursor += 1 assert cursor == num_records, "Internal Error: Not all HB records stored" self.table = out.view(np.recarray) @@ -1089,7 +1096,7 @@ def count_by_time(self): :Returns: a class:`numpy.recarray` """ - if self.timeseries is None: + if self._timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) @@ -1097,7 +1104,7 @@ def count_by_time(self): out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)]) for cursor, time_count in enumerate(zip(self.timesteps, - (len(series) for series in self.timeseries))): + (len(series) for series in self._timeseries))): out[cursor] = time_count return out.view(np.recarray) @@ -1112,14 +1119,14 @@ def count_by_type(self): :Returns: a class:`numpy.recarray` """ - if self.timeseries is None: + if self._timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return hbonds = defaultdict(int) - for hframe in self.timeseries: + for hframe in self._timeseries: for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, acceptor, distance, angle) in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) @@ -1173,14 +1180,14 @@ def timesteps_by_type(self): :Returns: a class:`numpy.recarray` """ - if self.timeseries is None: + if self._timeseries is None: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) logger.warn(msg) return hbonds = defaultdict(list) - for (t, hframe) in zip(self.timesteps, self.timeseries): + for (t, hframe) in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, acceptor, distance, angle) in hframe: donor_resnm, donor_resid, donor_atom = parse_residue(donor) diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py index aca854145f2..2e8d1082d46 100644 --- a/testsuite/MDAnalysisTests/analysis/test_hbonds.py +++ b/testsuite/MDAnalysisTests/analysis/test_hbonds.py @@ -26,14 +26,17 @@ from MDAnalysis import SelectionError, SelectionWarning from numpy.testing import (assert_, assert_equal, assert_array_equal, - assert_raises) + assert_almost_equal, assert_array_almost_equal, + assert_raises, dec) import numpy as np import itertools import warnings from six import StringIO -from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC +from MDAnalysisTests import parser_not_found +from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC, waterPSF, waterDCD + # For type guessing: from MDAnalysis.topology.core import guess_atom_type from MDAnalysis.core.topologyattrs import Atomtypes @@ -226,3 +229,92 @@ def run_HBA_dynamic_selections(*args): yield run_HBA_dynamic_selections, s1, s2, s1type finally: self._tearDown() + + +class TestHydrogenBondAnalysisTIP3P(object): + @dec.skipif(parser_not_found('DCD'), + 'DCD parser not available. Are you using python 3?') + def setUp(self): + self.universe = u = MDAnalysis.Universe(waterPSF, waterDCD) + self.kwargs = { + 'selection1': 'all', + 'selection2': 'all', + 'detect_hydrogens': "distance", + 'distance': 3.0, + 'angle': 120.0, + } + self.h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, **self.kwargs) + self.h.run(verbose=False) + self.h.generate_table() + self.normalized_timeseries = self._normalize_timeseries() + + # keys are the names in the h.table + self.reference = { + 'distance': {'mean': 2.0208776, 'std': 0.31740859}, + 'angle': {'mean': 155.13521, 'std': 12.98955}, + } + + # reference values for the table only + self.reference_table = { + 'donor_resnm': ["TIP3"] * len(self.normalized_timeseries), + 'acceptor_resnm': ["TIP3"] * len(self.normalized_timeseries), + } + + # index into timeseries (ADJUST ONCE donor_idx and acceptor_ndx are removed) + # with keys being field names in h.table + self.columns = { + 'time': 0, + 'donor_idx': 1, + 'acceptor_idx': 2, + 'donor_index': 3, + 'acceptor_index': 4, + 'distance': 7, + 'angle': 8, + } + + # hackish way to allow looping over self.reference and generating tests + self._functions = { + 'mean': np.mean, + 'std': np.std, + } + + def _normalize_timeseries(self): + # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle) + # array index: 0 1 2 3 4 5 6 7 8 + timeseries = [[t] + item + for t, hframe in zip(self.h.timesteps, self.h.timeseries) + for item in hframe] + return timeseries + + def test_timeseries(self): + h = self.h + assert_equal(len(h.timeseries), 10) + assert_equal(len(self.normalized_timeseries), 29) + + for observable in self.reference: + idx = self.columns[observable] + for quantity, reference in self.reference[observable].items(): + func = self._functions[quantity] + assert_almost_equal( + func([item[idx] for item in self.normalized_timeseries]), reference, + decimal=5, + err_msg="{quantity}({observable}) does not match reference".format(**vars())) + + def test_table_atoms(self): + h = self.h + table = h.table + + assert_equal(len(h.table), len(self.normalized_timeseries)) + + # test that timeseries and table agree on index data and + # hydrogen bond information at atom level + for name, idx in self.columns.items(): + assert_array_almost_equal(table.field(name), [data[idx] for data in self.normalized_timeseries], + err_msg="table[{name}] and timeseries[{idx} do not agree".format(**vars())) + + # test at residue level (issue #801 + # https://github.com/MDAnalysis/mdanalysis/issues/801) + for name, ref in self.reference_table.items(): + assert_array_equal(h.table.field(name), ref, + err_msg="resname for {0} do not match (Issue #801)") + From bad97b9d3f24dcbd5235396f4b4c291c044323fb Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Tue, 9 May 2017 17:10:54 -0700 Subject: [PATCH 2/6] Fixed incorrect handling of residue names with trailing numbers in HydrogenBondAnalysis (issue #801) - fixes #801 - analysis.hbond.hbond_analysis.HydrogenBondAnalysis now correctly stores and parses donor and acceptor names and is not tripped up by resnames that end in numbers, such as TIP3 - store timeseries as _timeseries with donor and acceptor atom identifiers as tuples instead of strings (avoids the use of fragile lib.util.parse_residue()) - made timeseries a property that is generated on the fly so that old code does not break, but it is not cached (to avoid memory consumption for big trajectories) and so users should cache themselves if needed - added tests - rewrote parts of the docs and added notes on use of timeseries - updated DEPRECATION warning for 1-based indices to 0.17.0 (should have been removed in 0.16.0 but we forgot) --- package/CHANGELOG | 6 +- .../analysis/hbonds/hbond_analysis.py | 154 ++++++++++++------ 2 files changed, 106 insertions(+), 54 deletions(-) diff --git a/package/CHANGELOG b/package/CHANGELOG index abe47f11d5c..5caa76fca08 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -15,7 +15,7 @@ The rules for this file: ------------------------------------------------------------------------------ mm/dd/17 utkbansal, kain88-de, xiki-tempula, kaplajon, wouterboomsma, - richardjgowers, Shtkddud123, QuantumEntangledAndy + richardjgowers, Shtkddud123, QuantumEntangledAndy, orbeckst * 0.16.1 @@ -38,7 +38,9 @@ Fixes (PR #1325) * Fix PDBParser docs for conect (issue #1246) * Fixed bug where amber topology files would fail to load if number of atoms was - exectly divisible by number of atoms per line (issue #1331) + exactly divisible by number of atoms per line (issue #1331) + * Fixed incorrect handling of residue names with trailing numbers in + HydrogenBondAnalysis (issue #801) * Fixed AnalysisBase class provides numerical start,stop,step values (PR #1340) Changes diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 82375507a4b..19c47bedbe1 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -89,9 +89,10 @@ MDAnalysis simply subtract 1. For instance, to find an atom in :attr:`Universe.atoms` by *index* from the output one would use ``u.atoms[index-1]``. + .. deprecated:: 0.15.0 - This feature is being deprecated in favor of zero-based indices and is targeted - for removal in 0.16.0. + The 1-based indices are being deprecated in favor of zero-based indices + and is targeted for removal in 0.17.0. Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat @@ -258,44 +259,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): :attr:`~HydrogenBondAnalysis.timeseries` to find the specific time point of a hydrogen bond existence, or see :attr:`~HydrogenBondAnalysis.table`. - .. attribute:: timeseries - - Results of the hydrogen bond analysis, stored for each frame. In - the following description, # indicates comments that are not - part of the output:: - - results = [ - [ # frame 1 - [ # hbond 1 - , , , - , , , - , - ], - [ # hbond 2 - , , , - , , , - , - ], - .... - ], - [ # frame 2 - [ ... ], [ ... ], ... - ], - ... - ] - - The time of each step is not stored with each hydrogen bond frame but in - :attr:`~HydrogenBondAnalysis.timesteps`. - - .. Note:: - - The *index* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis simply subtract 1. For - instance, to find an atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[index-1]``. - - - .. attribute:: table A normalised table of the data in @@ -327,7 +290,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): 0-based index typically used in MDAnalysis simply subtract 1. For instance, to find an atom in :attr:`Universe.atoms` by *index* one would use ``u.atoms[idx_zero]``. The 1-based index is deprecated and - targeted for removal in 0.16.0 + targeted for removal in 0.17.0 @@ -341,7 +304,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): The donor and acceptor indices being 1-based is deprecated in favor of a zero-based index. This can be accessed by "donor_index" or "acceptor_index" removal of the 1-based indices is targeted - for version 0.16.0 + for version 0.17.0 """ from __future__ import division, absolute_import @@ -354,7 +317,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): import logging from MDAnalysis import MissingDataWarning, NoDataError, SelectionError, SelectionWarning -from MDAnalysis.lib.util import parse_residue from MDAnalysis.lib.mdamath import norm, angle from MDAnalysis.lib.log import ProgressMeter, _set_verbose from MDAnalysis.lib.NeighborSearch import AtomNeighborSearch @@ -583,7 +545,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ "The donor and acceptor indices being 1-based is deprecated in favor" " of a zero-based index. These can be accessed by 'donor_index' or" " 'acceptor_index', removal of the 1-based indices is targeted for" - " version 0.16.0", category=DeprecationWarning) + " version 0.17.0", category=DeprecationWarning) self._get_bonded_hydrogens_algorithms = { "distance": self._get_bonded_hydrogens_dist, # 0.7.6 default @@ -858,7 +820,8 @@ def run(self, **kwargs): Note ---- - Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data into a different format. + Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data + into a different format. .. versionchanged:: 0.7.6 @@ -875,7 +838,7 @@ def run(self, **kwargs): The donor and acceptor indices being 1-based is deprecated in favor of a zero-based index. This can be accessed by "donor_index" or "acceptor_index" removal of the 1-based indices is targeted - for version 0.16.0 + for version 0.17.0 .. deprecated:: 0.16 The *quiet* keyword argument is deprecated in favor of the *verbose* @@ -1019,9 +982,96 @@ def calc_eucl_distance(a1, a2): @property def timeseries(self): - """Time series of hydrogen bonds.""" + """Time series of hydrogen bonds. + + The results of the hydrogen bond analysis can be accessed as a `list` of `list` of `list`: + + 1. `timeseries[i]`: data for the i-th trajectory frame (at time + `timesteps[i]`, see :attr:`timesteps`) + 2. `timeseries[i][j]`: j-th hydrogen bond that was detected at the i-th + frame. + 3. ``donor_idx, acceptor_idx, donor_index, acceptor_index, + donor_name_str, acceptor_name_str, distance, angle = + timeseries[i][j]``: structure of one hydrogen bond data item + + + In the following description, ``#`` indicates comments that are not + part of the output:: + + results = [ + [ # frame 1 + [ # hbond 1 + , , , + , , , + , + ], + [ # hbond 2 + , , , + , , , + , + ], + .... + ], + [ # frame 2 + [ ... ], [ ... ], ... + ], + ... + ] + + The time of each step is not stored with each hydrogen bond frame but in + :attr:`~HydrogenBondAnalysis.timesteps`. + + + Note + ---- + The *index* is a 1-based index. To get the :attr:`Atom.index` (the + 0-based index typically used in MDAnalysis simply subtract 1. For + instance, to find an atom in :attr:`Universe.atoms` by *index* one + would use ``u.atoms[index-1]``. - return self._timeseries + The :attr:`timeseries` is a managed attribute and it is generated from + the underlying data in :attr:`_timeseries` every time the attribute is + accessed. It is therefore costly to call and if :attr:`timeseries` is + needed repeatedly it is recommended that you assign to a variable:: + + h = HydrogenBondAnalysis(u) + h.run() + timeseries = h.timeseries + + See Also + -------- + HydrogenBondAnalysis.table : structured array of the data + + + .. versionchanged:: 0.16.1 + :attr:`timeseries` has become a managed attribute and is generated from the stored + :attr:`_timeseries` when needed. :attr:`_timeseries` contains the donor atom and + acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings. + + .. deprecated:: 0.15.0 + The 1-based indices are being deprecated in favor of zero-based indices + and they are targeted for removal in 0.17.0. + + .. deprecated:: 1.0 + :attr:`timeseries` will be replaced/changed + + """ + return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries] + + @staticmethod + def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"): + """Convert 0.16.1 _timeseries hbond item to 0.16.0 hbond item. + + In 0.16.1, donor and acceptor are stored as a tuple(resname, + resid, atomid). In 0.16.0 and earlier they were stored as a string. + + .. deprecated:: 1.0 + + """ + # change indices once we remove 1-based donor_idx and acceptor_idx + return (hb[:4] + + [atomformat.format(hb[4]), atomformat.format(hb[5])] + + hb[6:]) def generate_table(self): """Generate a normalised table of the results. @@ -1129,8 +1179,8 @@ def count_by_type(self): for hframe in self._timeseries: for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = parse_residue(donor) - acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) + donor_resnm, donor_resid, donor_atom = donor + acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond \ # (the donor_heavy_atom placeholder '?' is added later) # idx_zero is redundant for an unambigous key, but included for @@ -1190,8 +1240,8 @@ def timesteps_by_type(self): for (t, hframe) in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, acceptor, distance, angle) in hframe: - donor_resnm, donor_resid, donor_atom = parse_residue(donor) - acceptor_resnm, acceptor_resid, acceptor_atom = parse_residue(acceptor) + donor_resnm, donor_resid, donor_atom = donor + acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond # (the donor_heavy_atom placeholder '?' is added later) # idx_zero is redundant for key but added for consistency From 634a18dbc26d5b91bfc5d41b737befacb336abe9 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 10 May 2017 12:23:24 -0700 Subject: [PATCH 3/6] hbond analysis doc fixes/improvements - mostly numpy style (whenever possible): Apparently, napoleon does not like a single Notes and See Also section, need to use reST. - named the 1-based indices "idx" in the docs. - added example for analysis - describe convenience analysis functions - how to use pandas --- .../analysis/hbonds/hbond_analysis.py | 481 +++++++++++------- 1 file changed, 287 insertions(+), 194 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 19c47bedbe1..76d48b4bd98 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -65,12 +65,12 @@ results = [ [ # frame 1 [ # hbond 1 - , , , + , , , , , , , ], [ # hbond 2 - , , , + , , , , , , , ], @@ -84,21 +84,23 @@ .. Note:: - For historic reasons, the *donor index* and *acceptor index* are a 1-based - indices. To get the :attr:`Atom.index` (the 0-based index typically used in - MDAnalysis simply subtract 1. For instance, to find an atom in - :attr:`Universe.atoms` by *index* from the output one would use - ``u.atoms[index-1]``. + For historic reasons, the output contains 1-based indices (named *donor idx* + and *acceptor idx*) in addition to 0-based indices (named *donor index* and + *acceptor index*). To get the :attr:`Atom.index` (the 0-based index + typically used in MDAnalysis), use the *index* values (or subtract 1 from + *idx*). For instance, to find an atom in :attr:`Universe.atoms` by *index* + from the output one would use ``u.atoms[index]``. .. deprecated:: 0.15.0 The 1-based indices are being deprecated in favor of zero-based indices - and is targeted for removal in 0.17.0. + and are targeted for removal in 0.17.0. Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat the results as a flat "normalised" table that is easier to import into a -database for further processing. :meth:`HydrogenBondAnalysis.save_table` saves -the table to a pickled file. The table itself is a :class:`numpy.recarray`. +database or dataframe for further +processing. :meth:`HydrogenBondAnalysis.save_table` saves the table to a +pickled file. The table itself is a :class:`numpy.recarray`. Detection of hydrogen bonds @@ -107,12 +109,12 @@ Hydrogen bonds are recorded based on a geometric criterion: 1. The distance between acceptor and hydrogen is less than or equal to - *distance* (default is 3 Å). + `distance` (default is 3 Å). 2. The angle between donor-hydrogen-acceptor is greater than or equal to - *angle* (default is 120º). + `angle` (default is 120º). -The cut-off values *angle* and *distance* can be set as keywords to +The cut-off values `angle` and `distance` can be set as keywords to :class:`HydrogenBondAnalysis`. Donor and acceptor heavy atoms are detected from atom names. The current @@ -120,19 +122,17 @@ in Table `Default atom names for hydrogen bonding analysis`_. Hydrogen atoms bonded to a donor are searched with one of two algorithms, -selected with the *detect_hydrogens* keyword. - -*distance* +selected with the `detect_hydrogens` keyword. +"distance" Searches for all hydrogens (name "H*" or name "[123]H" or type "H") in the same residue as the donor atom within a cut-off distance of 1.2 Å. -*heuristic* - +"heuristic" Looks at the next three atoms in the list of atoms following the donor and selects any atom whose name matches (name "H*" or name "[123]H"). For -The *distance* search is more rigorous but slower and is set as the +The "distance" search is more rigorous but slower and is set as the default. Until release 0.7.6, only the heuristic search was implemented. .. versionchanged:: 0.7.6 @@ -186,16 +186,16 @@ donors/acceptors for residues such as histidine or cytosine. For more information about the naming convention in GLYCAM06 have a look at the -`Carbohydrate Naming Convention in Glycam`. +`Carbohydrate Naming Convention in Glycam`_. .. _`Carbohydrate Naming Convention in Glycam`: http://glycam.ccrc.uga.edu/documents/FutureNomenclature.htm The lists of donor and acceptor names can be extended by providing lists of -atom names in the *donors* and *acceptors* keywords to +atom names in the `donors` and `acceptors` keywords to :class:`HydrogenBondAnalysis`. If the lists are entirely inappropriate (e.g. when analysing simulations done with a force field that uses very -different atom names) then one should either use the value "other" for *forcefield* +different atom names) then one should either use the value "other" for `forcefield` to set no default values, or derive a new class and set the default list oneself:: class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): @@ -203,7 +203,7 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): DEFAULT_ACCEPTORS = {"OtherFF": tuple(set([...]))} Then simply use the new class instead of the parent class and call it with -*forcefield* = "OtherFF". Please also consider to contribute the list of heavy +`forcefield` = "OtherFF". Please also consider to contribute the list of heavy atom names to MDAnalysis. .. rubric:: References @@ -224,27 +224,55 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): import MDAnalysis.analysis.hbonds u = MDAnalysis.Universe('topology', 'trajectory') - h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'protein', distance=3.0, angle=120.0) + h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(u, 'protein', 'resname HOH', distance=3.0, angle=120.0) h.run() -The results are stored as the attribute -:attr:`HydrogenBondAnalysis.timeseries`; see :ref:`Analysis Output` -for the format and further options. - .. Note:: Due to the way :class:`HydrogenBondAnalysis` is implemented, it is - more efficient to have the second selection (*selection2*) be the + more efficient to have the second selection (`selection2`) be the *larger* group, e.g. the water when looking at water-protein H-bonds or the whole protein when looking at ligand-protein interactions. -.. Note:: - The topology supplied and the trajectory must reflect the same total number - of atoms. +The results are stored as the attribute +:attr:`HydrogenBondAnalysis.timeseries`; see :ref:`Analysis Output` for the +format and further options. + +A number of convenience functions are provided to process the +:attr:`~HydrogenBondAnalysis.timeseries` according to varying criteria: + +:meth:`~HydrogenBondAnalysis.count_by_time` + time series of the number of hydrogen bonds per time step +:meth:`~HydrogenBondAnalysis.count_by_type` + data structure with the frequency of each observed hydrogen bond +:meth:`~HydrogenBondAnalysis.timesteps_by_type` + data structure with a list of time steps for each observed hydrogen bond + +For further data analysis it is convenient to process the +:attr:`~HydrogenBondAnalysis.timeseries` data into a normalized table with the +:meth:`~HydrogenBondAnalysis.generate_table` method, which creates a new data +structure :attr:`HydrogenBondAnalysis.table` that contains one row for each +observation of a hydrogen bond:: + + h.generate_table() + +This table can then be easily turned into, e.g., a `pandas.DataFrame`_, and +further analyzed:: + + import pandas as pd + df = pd.DataFrame.from_records(h.table) + +For example, plotting a histogram of the hydrogen bond angles and lengths is as +simple as :: + + df.hist(column=["angle", "distance"]) -.. TODO: how to analyse the ouput and notes on selection updating +.. _pandas.DataFrame: http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.html + + +.. TODO: notes on selection updating Classes @@ -280,19 +308,30 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): 11. "distance" 12. "angle" - It takes up more space than - :attr:`~HydrogenBondAnalysis.timeseries` but it is easier to - analyze and to import into databases (e.g. using recsql_). + It takes up more space than :attr:`~HydrogenBondAnalysis.timeseries` but + it is easier to analyze and to import into databases or dataframes. + + + .. rubric:: Example + + For example, to create a `pandas.DataFrame`_ from ``h.table``:: + + import pandas as pd + df = pd.DataFrame.from_records(h.table) .. Note:: - The *index* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis simply subtract 1. For - instance, to find an atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[idx_zero]``. The 1-based index is deprecated and - targeted for removal in 0.17.0 + Each index variable named *idx* is a 1-based index. To get the :attr:`Atom.index` (the + 0-based index typically used in MDAnalysis) simply subtract 1, or better, use the 0-based variables named *index*. + For instance, to find an acceptor atom in :attr:`Universe.atoms` by *index* one + would use ``u.atoms[acceptor_index]``. + .. deprecated:: 0.15.0 + The donor and acceptor indices being 1-based is deprecated in favor of + a zero-based index. The 0-based indices can be accessed by + *donor_index* or "acceptor_index"; removal of the 1-based indices is + targeted for version 0.17.0 .. automethod:: _get_bonded_hydrogens @@ -300,12 +339,6 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): .. automethod:: _get_bonded_hydrogens_list - .. deprecated:: 0.15.0 - The donor and acceptor indices being 1-based is deprecated in favor of - a zero-based index. This can be accessed by "donor_index" or - "acceptor_index" removal of the 1-based indices is targeted - for version 0.17.0 - """ from __future__ import division, absolute_import import six @@ -364,9 +397,9 @@ class HydrogenBondAnalysis(object): # table; set() takes care for removing duplicates. At the end the # DEFAULT_DONORS and DEFAULT_ACCEPTORS should simply be tuples. - # : default heavy atom names whose hydrogens are treated as *donors* - # : (see :ref:`Default atom names for hydrogen bonding analysis`) - #: Use the keyword *donors* to add a list of additional donor names. + #: default heavy atom names whose hydrogens are treated as *donors* + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `donors` to add a list of additional donor names. DEFAULT_DONORS = { 'CHARMM27': tuple(set([ 'N', 'OH2', 'OW', 'NE', 'NH1', 'NH2', 'ND2', 'SG', 'NE2', 'ND1', 'NZ', 'OG', 'OG1', 'NE1', 'OH'])), @@ -374,8 +407,8 @@ class HydrogenBondAnalysis(object): 'other': tuple(set([]))} #: default atom names that are treated as hydrogen *acceptors* - #: (see :ref:`Default atom names for hydrogen bonding analysis`) - #: Use the keyword *acceptors* to add a list of additional acceptor names. + #: (see :ref:`Default atom names for hydrogen bonding analysis`); + #: use the keyword `acceptors` to add a list of additional acceptor names. DEFAULT_ACCEPTORS = { 'CHARMM27': tuple(set([ 'O', 'OH2', 'OW', 'OD1', 'OD2', 'SG', 'OE1', 'OE1', 'OE2', 'ND1', 'NE2', 'SD', 'OG', 'OG1', 'OH'])), @@ -401,35 +434,19 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ The timeseries is accessible as the attribute :attr:`HydrogenBondAnalysis.timeseries`. Some initial checks are performed. If there are no atoms selected by - *selection1* or *selection2* or if no donor hydrogens or acceptor atoms + `selection1` or `selection2` or if no donor hydrogens or acceptor atoms are found then a :exc:`SelectionError` is raised for any selection that - does *not* update (*update_selection1* and *update_selection2* + does *not* update (`update_selection1` and `update_selection2` keywords). For selections that are set to update, only a warning is logged because it is assumed that the selection might contain atoms at a later frame (e.g. for distance based selections). If no hydrogen bonds are detected or if the initial check fails, look at the log output (enable with :func:`MDAnalysis.start_logging` and set - *verbose* = ``True``). It is likely that the default names for donors + `verbose` ``=True``). It is likely that the default names for donors and acceptors are not suitable (especially for non-standard - ligands). In this case, either change the *forcefield* or use - customized *donors* and/or *acceptors*. - - .. Note:: - - In order to speed up processing, atoms are filtered by a coarse - distance criterion before a detailed hydrogen bonding analysis is - performed (*filter_first* = ``True``). If one of your selections is - e.g. the solvent then *update_selection1* (or *update_selection2*) - must also be ``True`` so that the list of candidate atoms is updated - at each step: this is now the default. - - If your selections will essentially remain the same for all time - steps (i.e. residues are not moving farther than 3 x *distance*), for - instance, if no water or large conformational changes are involved - or if the optimization is disabled (*filter_first* = ``False``) then - you can improve performance by setting the *update_selection* - keywords to ``False``. + ligands). In this case, either change the `forcefield` or use + customized `donors` and/or `acceptors`. Parameters ---------- @@ -441,31 +458,31 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ Selection string for second selection ['all'] selection1_type : str (optional) Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the - value for *selection1_type* automatically determines how - *selection2* handles donors and acceptors: If *selection1* contains - 'both' then *selection2* will also contain *both*. If *selection1* - is set to 'donor' then *selection2* is 'acceptor' (and vice versa). + value for `selection1_type` automatically determines how + `selection2` handles donors and acceptors: If `selection1` contains + 'both' then `selection2` will also contain *both*. If `selection1` + is set to 'donor' then `selection2` is 'acceptor' (and vice versa). ['both']. update_selection1 : bool (optional) Update selection 1 at each frame? [``False``] update_selection2 : bool (optional) Update selection 2 at each frame? [``False``] filter_first : bool (optional) - Filter selection 2 first to only atoms 3*distance away [``True``] + Filter selection 2 first to only atoms 3 * `distance` away [``True``] distance : float (optional) Distance cutoff for hydrogen bonds; only interactions with a H-A distance - <= *distance* (and the appropriate D-H-A angle, see *angle*) are - recorded. (Note: *distance_type* can change this to the D-A distance.) [3.0] + <= `distance` (and the appropriate D-H-A angle, see `angle`) are + recorded. (Note: `distance_type` can change this to the D-A distance.) [3.0] angle : float (optional) Angle cutoff for hydrogen bonds; an ideal H-bond has an angle of 180º. A hydrogen bond is only recorded if the D-H-A angle is - >= *angle*. The default of 120º also finds fairly non-specific + >= `angle`. The default of 120º also finds fairly non-specific hydrogen interactions and a possibly better value is 150º. [120.0] forcefield : {"CHARMM27", "GLYCAM06", "other"} (optional) Name of the forcefield used. Switches between different :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS` and :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS` values. - Available values: "CHARMM27", "GLYCAM06", "other" ["CHARMM27"] + ["CHARMM27"] donors : sequence (optional) Extra H donor atom types (in addition to those in :attr:`~HydrogenBondAnalysis.DEFAULT_DONORS`), must be a sequence. @@ -474,18 +491,13 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ :attr:`~HydrogenBondAnalysis.DEFAULT_ACCEPTORS`), must be a sequence. start : int (optional) starting frame-index for analysis, ``None`` is the first one, 0. - *start* and *stop* are 0-based frame indices and are used to slice + `start` and `stop` are 0-based frame indices and are used to slice the trajectory (if supported) [``None``] stop : int (optional) last trajectory frame for analysis, ``None`` is the last one [``None``] step : int (optional) - read every *step* between *start* and *stop*, ``None`` selects 1. - Note that not all trajectory reader from 1 [``None``] - debug : bool (optional) - If set to ``True`` enables per-frame debug logging. This is disabled - by default because it generates a very large amount of output in - the log file. (Note that a logger must have been started to see - the output, e.g. using :func:`MDAnalysis.start_logging`.) + read every `step` between `start` (included) and `stop` (excluded), + ``None`` selects 1. [``None``] detect_hydrogens : {"distance", "heuristic"} (optional) Determine the algorithm to find hydrogens connected to donor atoms. Can be "distance" (default; finds all hydrogens in the @@ -499,6 +511,14 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ attoms ("heavy") or between donor hydrogen and acceptor heavy atom ("hydrogen"). If using "heavy" then one should set the *distance* cutoff to a higher value such as 3.5 Å. ["hydrogen"] + debug : bool (optional) + If set to ``True`` enables per-frame debug logging. This is disabled + by default because it generates a very large amount of output in + the log file. (Note that a logger must have been started to see + the output, e.g. using :func:`MDAnalysis.start_logging`.) + verbose : bool (optional) + Toggle progress output. (Can also be given as keyword argument to + :meth:`run`.) Raises ------ @@ -506,28 +526,44 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ is raised for each static selection without the required donors and/or acceptors. + Notes + ----- + In order to speed up processing, atoms are filtered by a coarse + distance criterion before a detailed hydrogen bonding analysis is + performed (`filter_first` = ``True``). If one of your selections is + e.g. the solvent then `update_selection1` (or `update_selection2`) must + also be ``True`` so that the list of candidate atoms is updated at each + step: this is now the default. + + If your selections will essentially remain the same for all time steps + (i.e. residues are not moving farther than 3 x `distance`), for + instance, if neither water nor large conformational changes are + involved or if the optimization is disabled (`filter_first` = + ``False``) then you can improve performance by setting the + `update_selection1` and/or `update_selection2` keywords to ``False``. + .. versionchanged:: 0.7.6 - New *verbose* keyword (and per-frame debug logging disabled by + New `verbose` keyword (and per-frame debug logging disabled by default). - New *detect_hydrogens* keyword to switch between two different + New `detect_hydrogens` keyword to switch between two different algorithms to detect hydrogens bonded to donor. "distance" is a new, rigorous distance search within the residue of the donor atom, "heuristic" is the previous list scan (improved with an additional distance check). - New *forcefield* keyword to switch between different values of + New `forcefield` keyword to switch between different values of DEFAULT_DONORS/ACCEPTORS to accomodate different force fields. Also has an option "other" for no default values. .. versionchanged:: 0.8 - The new default for *update_selection1* and *update_selection2* is now + The new default for `update_selection1` and `update_selection2` is now ``True`` (see `Issue 138`_). Set to ``False`` if your selections only need to be determined once (will increase performance). .. versionchanged:: 0.9.0 - New keyword *distance_type* to select between calculation between + New keyword `distance_type` to select between calculation between heavy atoms or hydrogen-acceptor. It defaults to the previous behavior (i.e. "hydrogen"). @@ -535,11 +571,12 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ Initial checks for selections that potentially raise :exc:`SelectionError`. .. deprecated:: 0.16 - The *verbose* keyword argument is replaced by *debug*. Note that the - *verbose* keyword argument is now comsistently used to toggle - progress meters throuthout the library. + The previous `verbose` keyword argument was replaced by + `debug`. Note that the `verbose` keyword argument is now + consistently used to toggle progress meters throughout the library. .. _`Issue 138`: https://github.com/MDAnalysis/mdanalysis/issues/138 + """ warnings.warn( "The donor and acceptor indices being 1-based is deprecated in favor" @@ -660,15 +697,25 @@ def _log_parameters(self): logger.info("HBond analysis: bonded hydrogen detection algorithm: %r", self.detect_hydrogens) def _get_bonded_hydrogens(self, atom, **kwargs): - """Find hydrogens bonded to *atom*. + """Find hydrogens bonded to `atom`. This method is typically not called by a user but it is documented to facilitate understanding of the internals of :class:`HydrogenBondAnalysis`. - :Returns: list of hydrogens (can be a - :class:`~MDAnalysis.core.groups.AtomGroup`) or empty list - ``[]`` if none were found. + Parameters + ---------- + atom : groups.Atom + heavy atom + **kwargs + passed through to the calculation method that was selected with + the `detect_hydrogens` kwarg of :class:`HydrogenBondAnalysis`. + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. See Also -------- @@ -677,30 +724,47 @@ def _get_bonded_hydrogens(self, atom, **kwargs): .. versionchanged:: 0.7.6 - Can switch algorithm by using the *detect_hydrogens* keyword to the + Can switch algorithm by using the `detect_hydrogens` keyword to the constructor. *kwargs* can be used to supply arguments for algorithm. + """ return self._get_bonded_hydrogens_algorithms[self.detect_hydrogens](atom, **kwargs) def _get_bonded_hydrogens_dist(self, atom): - """Find hydrogens bonded within *cutoff* to *atom*. + """Find hydrogens bonded within cutoff to `atom`. - * hydrogens are detected by either name ("H*", "[123]H*") or type - ("H"); this is not fool-proof as the atom type is not always a - character but the name pattern should catch most typical occurrences. + Hydrogens are detected by either name ("H*", "[123]H*") or type ("H"); + this is not fool-proof as the atom type is not always a character but + the name pattern should catch most typical occurrences. - * The distance from *atom* is calculated for all hydrogens in the - residue and only those within a cutoff are kept. The cutoff depends - on the heavy atom (more precisely, on its element, which is taken as - the first letter of its name ``atom.name[0]``) and is parameterized - in :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the - default of 1.5 Å is used. + The distance from `atom` is calculated for all hydrogens in the residue + and only those within a cutoff are kept. The cutoff depends on the + heavy atom (more precisely, on its element, which is taken as the first + letter of its name ``atom.name[0]``) and is parameterized in + :attr:`HydrogenBondAnalysis.r_cov`. If no match is found then the + default of 1.5 Å is used. + + Parameters + ---------- + atom : groups.Atom + heavy atom + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + + Notes + ----- The performance of this implementation could be improved once the topology always contains bonded information; it currently uses the selection parser with an "around" selection. + .. versionadded:: 0.7.6 + """ try: return atom.residue.atoms.select_atoms( @@ -722,6 +786,20 @@ def _get_bonded_hydrogens_list(self, atom, **kwargs): depends on the heavy atom and is parameterized in :attr:`HydrogenBondAnalysis.r_cov`. + Parameters + ---------- + atom : groups.Atom + heavy atom + **kwargs + ignored + + Returns + ------- + hydrogen_atoms : AtomGroup or [] + list of hydrogens (can be a :class:`~MDAnalysis.core.groups.AtomGroup`) + or empty list ``[]`` if none were found. + + .. versionchanged:: 0.7.6 Added detection of ``[123]H`` and additional check that a @@ -813,25 +891,37 @@ def run(self, **kwargs): :attr:`HydrogenBondAnalysis.timeseries` (see there for output format). - The method accepts a number of keywords, amongst them *verbose* - (default ``True``), which toggles the porgress output (see - :class:`~MDAnalysis.lib.log.ProgressMeter`) and *debug* which can - be used to change the debug value provided to the class constructor. + Parameters + ---------- + verbose : bool (optional) + toggle progress meter output :class:`~MDAnalysis.lib.log.ProgressMeter` + [``True``] + debug : bool (optional) + enable detailed logging of debugging information; this can create + *very big* log files so it is disable (``False``) by default; setting + `debug` toggles the debug status for :class:`HydrogenBondAnalysis`, + namely the value of :attr:`HydrogenBondAnalysis.debug`. - Note - ---- - Use :meth:`HydrogenBondAnalysis.generate_table` for processing the data - into a different format. + Other Parameters + ---------------- + remove_duplicates : bool (optional) + duplicate hydrogen bonds are removed from output if set to the + default value ``True``; normally, this should not be changed. + + See Also + -------- + :meth:`HydrogenBondAnalysis.generate_table` : + processing the data into a different format. .. versionchanged:: 0.7.6 Results are not returned, only stored in :attr:`~HydrogenBondAnalysis.timeseries` and duplicate hydrogen bonds - are removed from output (can be suppressed with *remove_duplicates* = + are removed from output (can be suppressed with `remove_duplicates` = ``False``) .. versionchanged:: 0.11.0 - Accept *quiet* keyword. Analysis will now proceed through frames even if + Accept `quiet` keyword. Analysis will now proceed through frames even if no donors or acceptors were found in a particular frame. .. deprecated:: 0.15.0 @@ -841,9 +931,9 @@ def run(self, **kwargs): for version 0.17.0 .. deprecated:: 0.16 - The *quiet* keyword argument is deprecated in favor of the *verbose* - one. Previous use of *verbose* now corresponds to the new keyword - argument *debug*. + The `quiet` keyword argument is deprecated in favor of the `verbose` + one. Previous use of `verbose` now corresponds to the new keyword + argument `debug`. """ logger.info("HBond analysis: starting") @@ -963,8 +1053,8 @@ def _get_timestep(): self._timeseries.append(frame_results) - logger.info("HBond analysis: complete; timeseries with %d hbonds in %s.timeseries", - self.count_by_time().count.sum(), self.__class__.__name__) + logger.info("HBond analysis: complete; timeseries %s.timeseries", + self.__class__.__name__) @staticmethod def calc_angle(d, h, a): @@ -1001,12 +1091,12 @@ def timeseries(self): results = [ [ # frame 1 [ # hbond 1 - , , , + , , , , , , , ], [ # hbond 2 - , , , + , , , , , , , ], @@ -1024,23 +1114,28 @@ def timeseries(self): Note ---- - The *index* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis simply subtract 1. For - instance, to find an atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[index-1]``. + Each index variable named *idx* is a 1-based index. To get the + :attr:`Atom.index` (the 0-based index typically used in MDAnalysis) + simply subtract 1, or better, use the 0-based variables named + *index*. - The :attr:`timeseries` is a managed attribute and it is generated from - the underlying data in :attr:`_timeseries` every time the attribute is - accessed. It is therefore costly to call and if :attr:`timeseries` is - needed repeatedly it is recommended that you assign to a variable:: + For instance, to find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. + + The :attr:`timeseries` is a managed attribute and it is generated + from the underlying data in :attr:`_timeseries` every time the + attribute is accessed. It is therefore costly to call and if + :attr:`timeseries` is needed repeatedly it is recommended that you + assign to a variable:: + + h = HydrogenBondAnalysis(u) + h.run() + timeseries = h.timeseries - h = HydrogenBondAnalysis(u) - h.run() - timeseries = h.timeseries See Also -------- - HydrogenBondAnalysis.table : structured array of the data + :attr:`table` : structured array of the data .. versionchanged:: 0.16.1 @@ -1048,12 +1143,9 @@ def timeseries(self): :attr:`_timeseries` when needed. :attr:`_timeseries` contains the donor atom and acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings. - .. deprecated:: 0.15.0 - The 1-based indices are being deprecated in favor of zero-based indices - and they are targeted for removal in 0.17.0. - - .. deprecated:: 1.0 - :attr:`timeseries` will be replaced/changed + .. deprecated:: 0.15.0 + The 1-based indices are being deprecated in favor of zero-based indices + and they are targeted for removal in 0.17.0. """ return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries] @@ -1077,26 +1169,12 @@ def generate_table(self): """Generate a normalised table of the results. The table is stored as a :class:`numpy.recarray` in the - attribute :attr:`~HydrogenBondAnalysis.table` and can be used - with e.g. `recsql`_. - - Columns: - 0. "time" - 1. "donor_idx" - 2. "acceptor_idx" - 3. "donor_index" - 4. "acceptor_index" - 5. "donor_resnm" - 6. "donor_resid" - 7. "donor_atom" - 8. "acceptor_resnm" - 9. "acceptor_resid" - 10. "acceptor_atom" - 11. "distance" - 12. "angle" + attribute :attr:`~HydrogenBondAnalysis.table`. + See Also + -------- + HydrogenBondAnalysis.table - .. _recsql: http://pypi.python.org/pypi/RecSQL """ if self._timeseries is None: msg = "No timeseries computed, do run() first." @@ -1130,6 +1208,16 @@ def generate_table(self): def save_table(self, filename="hbond_table.pickle"): """Saves :attr:`~HydrogenBondAnalysis.table` to a pickled file. + If :attr:`~HydrogenBondAnalysis.table` does not exist yet, + :meth:`generate_table` is called first. + + Parameters + ---------- + filename : str (optional) + path to the filename + + Example + ------- Load with :: import cPickle @@ -1140,16 +1228,22 @@ def save_table(self, filename="hbond_table.pickle"): self.generate_table() cPickle.dump(self.table, open(filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) + def _has_timeseries(self): + has_timeseries = (self._timeseries is not None) + if not has_timeseries: + msg = "No timeseries computed, do run() first." + warnings.warn(msg, category=MissingDataWarning) + logger.warn(msg) + return has_timeseries + def count_by_time(self): """Counts the number of hydrogen bonds per timestep. - :Returns: a class:`numpy.recarray` + Returns + ------- + numpy.recarray """ - - if self._timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + if not self._has_timeseries(): return out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)]) @@ -1161,18 +1255,17 @@ def count_by_time(self): def count_by_type(self): """Counts the frequency of hydrogen bonds of a specific type. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns - a :class:`numpy.recarray` containing atom indices, residue - names, residue numbers (for donors and acceptors) and the - fraction of the total time during which the hydrogen bond was - detected. + Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a + :class:`numpy.recarray` containing atom indices, residue names, residue + numbers (for donors and acceptors) and the fraction of the total time + during which the hydrogen bond was detected. + + Returns + ------- + numpy.recarray - :Returns: a class:`numpy.recarray` """ - if self._timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + if not self._has_timeseries(): return hbonds = defaultdict(int) @@ -1222,18 +1315,17 @@ def count_by_type(self): def timesteps_by_type(self): """Frames during which each hydrogen bond existed, sorted by hydrogen bond. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns - a :class:`numpy.recarray` containing atom indices, residue - names, residue numbers (for donors and acceptors) and a list - of timesteps at which the hydrogen bond was detected. + Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a + :class:`numpy.recarray` containing atom indices, residue names, residue + numbers (for donors and acceptors) and a list of timesteps at which the + hydrogen bond was detected. - :Returns: a class:`numpy.recarray` - """ + Returns + ------- + numpy.recarray - if self._timeseries is None: - msg = "No timeseries computed, do run() first." - warnings.warn(msg, category=MissingDataWarning) - logger.warn(msg) + """ + if not self._has_timeseries(): return hbonds = defaultdict(list) @@ -1292,7 +1384,7 @@ def _donor_lookup_table_byres(self): * selections have not changed (because we are simply looking at the last content of the donors and donor hydrogen lists) - Donors from *selection1* and *selection2* are merged. + Donors from `selection1` and `selection2` are merged. Output dictionary ``h2donor`` can be used as:: @@ -1330,7 +1422,7 @@ def _donor_lookup_table_byindex(self): * selections have not changed (because we are simply looking at the last content of the donors and donor hydrogen lists) - Donors from *selection1* and *selection2* are merged. + Donors from `selection1` and `selection2` are merged. Output dictionary ``h2donor`` can be used as:: @@ -1343,6 +1435,7 @@ def _donor_lookup_table_byindex(self): tables generated by :class:`HydrogenBondAnalysis` contain 1-based indices and zero-based indices. + .. deprecated:: 0.15.0 The 1-based indices are deprecated in favor of the zero-based indices given by "idx_zero". From 2cf191a9b111771324e98e7823dc4d5a11432c7c Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 10 May 2017 17:59:19 -0700 Subject: [PATCH 4/6] HydrogneBondAnalysis: internal clean up Use atom.index instead of atom.index+1 internally and for debug output. --- .../analysis/hbonds/hbond_analysis.py | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 76d48b4bd98..ee811a35d60 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -970,16 +970,13 @@ def run(self, **kwargs): try: self.u.trajectory.time - def _get_timestep(): return self.u.trajectory.time - logger.debug("HBond analysis is recording time step") except NotImplementedError: # chained reader or xyz(?) cannot do time yet def _get_timestep(): return self.u.trajectory.frame - logger.warn("HBond analysis is recording frame number instead of time step") logger.info("Starting analysis (frame index start=%d stop=%d, step=%d)", @@ -989,7 +986,7 @@ def _get_timestep(): for progress, ts in enumerate(self.u.trajectory[self.traj_slice]): # all bonds for this timestep frame_results = [] - # dict of tuples (atomid, atomid) for quick check if + # dict of tuples (atom.index, atom.index) for quick check if # we already have the bond (to avoid duplicates) already_found = {} @@ -1017,15 +1014,14 @@ def _get_timestep(): dist = self.calc_eucl_distance(donor_atom, a) if angle >= self.angle and dist <= self.distance: self.logger_debug( - "S1-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG".format(h.index + 1, a.index + 1, dist, angle)) - #self.logger_debug("S1-D: %r <-> S2-A: %r %f A, %f DEG" % (h, a, dist, angle)) + "S1-D: {0!s} <-> S2-A: {1!s} {2:f} A, {3:f} DEG".format(h.index, a.index, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, (h.resname, h.resid, h.name), (a.resname, a.resid, a.name), dist, angle]) - already_found[(h.index + 1, a.index + 1)] = True + already_found[(h.index, a.index)] = True if self.selection1_type in ('acceptor', 'both') and self._s1_acceptors: self.logger_debug("Selection 1 Acceptors <-> Donors") ns_acceptors = AtomNeighborSearch(self._s1_acceptors) @@ -1035,16 +1031,15 @@ def _get_timestep(): res = ns_acceptors.search(h, self.distance) for a in res: if remove_duplicates and ( - (h.index + 1, a.index + 1) in already_found - or (a.index + 1, h.index + 1) in already_found): + (h.index, a.index) in already_found or + (a.index, h.index) in already_found): continue angle = self.calc_angle(d, h, a) donor_atom = h if self.distance_type != 'heavy' else d dist = self.calc_eucl_distance(donor_atom, a) if angle >= self.angle and dist <= self.distance: self.logger_debug( - "S1-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG".format(a.index + 1, h.index + 1, dist, angle)) - #self.logger_debug("S1-A: %r <-> S2-D: %r %f A, %f DEG" % (a, h, dist, angle)) + "S1-A: {0!s} <-> S2-D: {1!s} {2:f} A, {3:f} DEG".format(a.index, h.index, dist, angle)) frame_results.append( [h.index + 1, a.index + 1, h.index, a.index, (h.resname, h.resid, h.name), From 245560e704a21c2e92e0c6df98635756eb9aad94 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Wed, 10 May 2017 18:19:45 -0700 Subject: [PATCH 5/6] Hbond analysis: fixed docs (stated wrong defaults for updating) --- package/MDAnalysis/analysis/hbonds/hbond_analysis.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index ee811a35d60..38a6729dbc2 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -456,17 +456,19 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ Selection string for first selection ['protein'] selection2 : str (optional) Selection string for second selection ['all'] - selection1_type : str (optional) + selection1_type : {"donor", "acceptor", "both"} (optional) Selection 1 can be 'donor', 'acceptor' or 'both'. Note that the value for `selection1_type` automatically determines how `selection2` handles donors and acceptors: If `selection1` contains - 'both' then `selection2` will also contain *both*. If `selection1` + 'both' then `selection2` will also contain 'both'. If `selection1` is set to 'donor' then `selection2` is 'acceptor' (and vice versa). ['both']. update_selection1 : bool (optional) - Update selection 1 at each frame? [``False``] + Update selection 1 at each frame? Setting to ``False`` is recommended + for any static selection to increase performance. [``True``] update_selection2 : bool (optional) - Update selection 2 at each frame? [``False``] + Update selection 2 at each frame? Setting to ``False`` is recommended + for any static selection to increase performance. [``True``] filter_first : bool (optional) Filter selection 2 first to only atoms 3 * `distance` away [``True``] distance : float (optional) From ba47e1fdf87f2ab80d1e54159b970dee7d420ba6 Mon Sep 17 00:00:00 2001 From: Oliver Beckstein Date: Fri, 12 May 2017 15:56:48 -0700 Subject: [PATCH 6/6] hbond analysis: addressed review comments --- .../analysis/hbonds/hbond_analysis.py | 134 ++++++++++++------ 1 file changed, 89 insertions(+), 45 deletions(-) diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py index 38a6729dbc2..7aa82072ea3 100644 --- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py +++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py @@ -92,8 +92,9 @@ from the output one would use ``u.atoms[index]``. .. deprecated:: 0.15.0 - The 1-based indices are being deprecated in favor of zero-based indices - and are targeted for removal in 0.17.0. + The 1-based indices "donor_idx" and "acceptor_idx" are being deprecated + in favor of zero-based indices "donor_index" and "acceptor_index" and are + targeted for removal in 0.17.0. Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat @@ -321,17 +322,20 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis): .. Note:: - Each index variable named *idx* is a 1-based index. To get the :attr:`Atom.index` (the - 0-based index typically used in MDAnalysis) simply subtract 1, or better, use the 0-based variables named *index*. + The index variables *donor_idx* and *acceptor_idx* are 1-based + indices. To get the :attr:`Atom.index` (the 0-based index typically + used in MDAnalysis) simply subtract 1, or better, use the 0-based + variables named *donor_index* and *acceptor_index*. + + For instance, to find an acceptor atom in :attr:`Universe.atoms` by + *index* one would use ``u.atoms[acceptor_index]``. - For instance, to find an acceptor atom in :attr:`Universe.atoms` by *index* one - would use ``u.atoms[acceptor_index]``. .. deprecated:: 0.15.0 - The donor and acceptor indices being 1-based is deprecated in favor of - a zero-based index. The 0-based indices can be accessed by - *donor_index* or "acceptor_index"; removal of the 1-based indices is - targeted for version 0.17.0 + The 1-based donor and acceptor indices (*donor_idx* and + *acceptor_idx*) are deprecated in favor of 0-based indices. The + 0-based indices can be accessed by *donor_index* and *acceptor_index*; + removal of the 1-based indices is targeted for version 0.17.0 .. automethod:: _get_bonded_hydrogens @@ -585,6 +589,29 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_ " of a zero-based index. These can be accessed by 'donor_index' or" " 'acceptor_index', removal of the 1-based indices is targeted for" " version 0.17.0", category=DeprecationWarning) + # REMOVAL (DEPRECATION) instructions for 0.17.0: "1-based indices" (aka "idx" vs 0-based "index") + # - remove the warning + # - replace 'deprecated 0.15.0' with 'versionchanged 0.17.0' (and adjust text) + # - update docs for "timeseries" by removing all "idx (1-based)" entries: + # one hydrogen bond should now look like + # , , , , + # , + # - update docs for "table": removed *_idx and renumber + # - in run(): removed the 1-based indices (h.index + 1, a.index + 1) from frame_results: + # for instance, should now read (two occurences!) + # frame_results.append( + # [h.index, a.index, + # (h.resname, h.resid, h.name), + # (a.resname, a.resid, a.name), + # dist, angle]) + # - _reformat_hb(): change indices hb[:4] --> hb[:2] + # - generate_table(), count_*(), timesteps_by_type(): + # remove any donor_idx, acceptor_idx variables and fields. + # - update tests (change indices in analysis/test_hbonds.py) + # - update any docs in this file that mention "1-based" + # + # I suggest to keep this removal a separate commit. [@orbeckst] + self._get_bonded_hydrogens_algorithms = { "distance": self._get_bonded_hydrogens_dist, # 0.7.6 default @@ -900,7 +927,7 @@ def run(self, **kwargs): [``True``] debug : bool (optional) enable detailed logging of debugging information; this can create - *very big* log files so it is disable (``False``) by default; setting + *very big* log files so it is disabled (``False``) by default; setting `debug` toggles the debug status for :class:`HydrogenBondAnalysis`, namely the value of :attr:`HydrogenBondAnalysis.debug`. @@ -926,12 +953,6 @@ def run(self, **kwargs): Accept `quiet` keyword. Analysis will now proceed through frames even if no donors or acceptors were found in a particular frame. - .. deprecated:: 0.15.0 - The donor and acceptor indices being 1-based is deprecated in favor - of a zero-based index. This can be accessed by "donor_index" or - "acceptor_index" removal of the 1-based indices is targeted - for version 0.17.0 - .. deprecated:: 0.16 The `quiet` keyword argument is deprecated in favor of the `verbose` one. Previous use of `verbose` now corresponds to the new keyword @@ -1111,10 +1132,11 @@ def timeseries(self): Note ---- - Each index variable named *idx* is a 1-based index. To get the - :attr:`Atom.index` (the 0-based index typically used in MDAnalysis) - simply subtract 1, or better, use the 0-based variables named - *index*. + The index variables named *donor_idx* and *acceptor_idx* are 1-based + indices, which were used historically. To get the :attr:`Atom.index` + (the 0-based index typically used in MDAnalysis) simply subtract 1, or + better, use the 0-based variables named *donor_index* and + *acceptor_index*. For instance, to find an acceptor atom in :attr:`Universe.atoms` by *index* one would use ``u.atoms[acceptor_index]``. @@ -1141,8 +1163,9 @@ def timeseries(self): acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings. .. deprecated:: 0.15.0 - The 1-based indices are being deprecated in favor of zero-based indices - and they are targeted for removal in 0.17.0. + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of the 0-based indices "donor_index" and + "acceptor_index" and they are targeted for removal in 0.17.0. """ return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries] @@ -1155,9 +1178,14 @@ def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"): resid, atomid). In 0.16.0 and earlier they were stored as a string. .. deprecated:: 1.0 + This is a compatibility layer so that we can provide the same output + in timeseries as before. However, for 1.0 we should make timeseries + just return _timeseries, i.e., change the format of timeseries to + the un-ambiguous representation provided in _timeseries. """ - # change indices once we remove 1-based donor_idx and acceptor_idx + # change indices once we remove 1-based donor_idx and acceptor_idx: + # change indices hb[:4]... --> hb[:2], hb[2], hb[3], hb[4:] return (hb[:4] + [atomformat.format(hb[4]), atomformat.format(hb[5])] + hb[6:]) @@ -1193,7 +1221,7 @@ def generate_table(self): cursor = 0 # current row for t, hframe in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: + acceptor, distance, angle) in hframe: # donor|acceptor = (resname, resid, atomid) out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \ donor + acceptor + (distance, angle) @@ -1226,7 +1254,7 @@ def save_table(self, filename="hbond_table.pickle"): cPickle.dump(self.table, open(filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL) def _has_timeseries(self): - has_timeseries = (self._timeseries is not None) + has_timeseries = self._timeseries is not None if not has_timeseries: msg = "No timeseries computed, do run() first." warnings.warn(msg, category=MissingDataWarning) @@ -1236,9 +1264,17 @@ def _has_timeseries(self): def count_by_time(self): """Counts the number of hydrogen bonds per timestep. + Processes :attr:`HydrogenBondAnalysis._timeseries` into the time series + ``N(t)`` where ``N`` is the total number of observed hydrogen bonds at + time ``t``. + Returns ------- - numpy.recarray + counts : numpy.recarray + The resulting array can be thought of as rows ``(time, N)`` where + ``time`` is the time (in ps) of the time step and ``N`` is the + total number of hydrogen bonds. + """ if not self._has_timeseries(): return @@ -1252,15 +1288,23 @@ def count_by_time(self): def count_by_type(self): """Counts the frequency of hydrogen bonds of a specific type. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a + Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a :class:`numpy.recarray` containing atom indices, residue names, residue numbers (for donors and acceptors) and the fraction of the total time during which the hydrogen bond was detected. Returns ------- - numpy.recarray + counts : numpy.recarray + Each row of the array contains data to define a unique hydrogen + bond together with the frequency (fraction of the total time) that + it has been observed. + + .. deprecated:: 0.15.0 + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of zero-based indices and they are targeted for + removal in 0.17.0. """ if not self._has_timeseries(): return @@ -1268,7 +1312,7 @@ def count_by_type(self): hbonds = defaultdict(int) for hframe in self._timeseries: for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: + acceptor, distance, angle) in hframe: donor_resnm, donor_resid, donor_atom = donor acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond \ @@ -1312,14 +1356,25 @@ def count_by_type(self): def timesteps_by_type(self): """Frames during which each hydrogen bond existed, sorted by hydrogen bond. - Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a + Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a :class:`numpy.recarray` containing atom indices, residue names, residue - numbers (for donors and acceptors) and a list of timesteps at which the + numbers (for donors and acceptors) and each timestep at which the hydrogen bond was detected. + In principle, this is the same as :attr:`~HydrogenBondAnalysis.table` + but sorted by hydrogen bond and with additional data for the + *donor_heavy_atom* and angle and distance omitted. + + Returns ------- - numpy.recarray + data : numpy.recarray + + + .. deprecated:: 0.15.0 + The 1-based indices "donor_idx" and "acceptor_idx" are being + deprecated in favor of zero-based indices and they are targeted for + removal in 0.17.0. """ if not self._has_timeseries(): @@ -1328,7 +1383,7 @@ def timesteps_by_type(self): hbonds = defaultdict(list) for (t, hframe) in zip(self.timesteps, self._timeseries): for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor, - acceptor, distance, angle) in hframe: + acceptor, distance, angle) in hframe: donor_resnm, donor_resid, donor_atom = donor acceptor_resnm, acceptor_resid, acceptor_atom = acceptor # generate unambigous key for current hbond @@ -1425,17 +1480,6 @@ def _donor_lookup_table_byindex(self): heavy_atom_name = h2donor[index] - Note - ---- - *index* is the 0-based MDAnalysis index - (:attr:`MDAnalysis.core.groups.Atom.index`). The - tables generated by :class:`HydrogenBondAnalysis` contain - 1-based indices and zero-based indices. - - - .. deprecated:: 0.15.0 - The 1-based indices are deprecated in favor of the zero-based indices - given by "idx_zero". """ s1d = self._s1_donors # list of donor Atom instances s1h = self._s1_donors_h # dict indexed by donor position in donor list, containg AtomGroups of H