test for correct TIP3P resname in hbond analysis table (#801)

MDAnalysis · May 10, 2017 · 0764299 · 0764299
1 parent 1c8f87d
commit 0764299
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 19 deletions.
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -620,7 +620,7 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_
         elif self.selection1_type not in ('both', 'donor', 'acceptor'):
             raise ValueError('HydrogenBondAnalysis: Invalid selection type {0!s}'.format(self.selection1_type))
 
-        self.timeseries = None  # final result
+        self._timeseries = None  # final result accessed as self.timeseries
         self.timesteps = None  # time for each frame
 
         self.table = None  # placeholder for output table
@@ -898,7 +898,7 @@ def run(self, **kwargs):
         if not self.debug:
             logger.debug("HBond analysis: For full step-by-step debugging output use debug=True")
 
-        self.timeseries = []
+        self._timeseries = []
         self.timesteps = []
 
         logger.info("checking trajectory...")  # n_frames can take a while!
@@ -968,8 +968,8 @@ def _get_timestep():
                                 #self.logger_debug("S1-D: %r <-> S2-A: %r %f A, %f DEG" % (h, a, dist, angle))
                                 frame_results.append(
                                     [h.index + 1, a.index + 1, h.index, a.index,
-                                    '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name),
-                                    '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name),
+                                    (h.resname, h.resid, h.name),
+                                    (a.resname, a.resid, a.name),
                                     dist, angle])
 
                                 already_found[(h.index + 1, a.index + 1)] = True
@@ -994,11 +994,11 @@ def _get_timestep():
                                 #self.logger_debug("S1-A: %r <-> S2-D: %r %f A, %f DEG" % (a, h, dist, angle))
                                 frame_results.append(
                                     [h.index + 1, a.index + 1, h.index, a.index,
-                                    '{0!s}{1!s}:{2!s}'.format(h.resname, repr(h.resid), h.name),
-                                    '{0!s}{1!s}:{2!s}'.format(a.resname, repr(a.resid), a.name),
+                                     (h.resname, h.resid, h.name),
+                                     (a.resname, a.resid, a.name),
                                     dist, angle])
 
-            self.timeseries.append(frame_results)
+            self._timeseries.append(frame_results)
 
         logger.info("HBond analysis: complete; timeseries with %d hbonds in %s.timeseries",
                     self.count_by_time().count.sum(), self.__class__.__name__)
@@ -1017,6 +1017,12 @@ def calc_eucl_distance(a1, a2):
         """Calculate the Euclidean distance between two atoms. """
         return norm(a2.position - a1.position)
 
+    @property
+    def timeseries(self):
+        """Time series of hydrogen bonds."""
+
+        return self._timeseries
+
     def generate_table(self):
         """Generate a normalised table of the results.
 
@@ -1042,13 +1048,13 @@ def generate_table(self):
 
         .. _recsql: http://pypi.python.org/pypi/RecSQL
         """
-        if self.timeseries is None:
+        if self._timeseries is None:
             msg = "No timeseries computed, do run() first."
             warnings.warn(msg, category=MissingDataWarning)
             logger.warn(msg)
             return
 
-        num_records = np.sum([len(hframe) for hframe in self.timeseries])
+        num_records = np.sum([len(hframe) for hframe in self._timeseries])
         # build empty output table
         dtype = [
             ("time", float), ("donor_idx", int), ("acceptor_idx", int),
@@ -1060,11 +1066,12 @@ def generate_table(self):
         # and speedups of ~x10 can be achieved by filling a standard array, like this:
         out = np.empty((num_records,), dtype=dtype)
         cursor = 0  # current row
-        for t, hframe in zip(self.timesteps, self.timeseries):
+        for t, hframe in zip(self.timesteps, self._timeseries):
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
             acceptor, distance, angle) in hframe:
+                # donor|acceptor = (resname, resid, atomid)
                 out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \
-                parse_residue(donor) + parse_residue(acceptor) + (distance, angle)
+                donor + acceptor + (distance, angle)
                 cursor += 1
         assert cursor == num_records, "Internal Error: Not all HB records stored"
         self.table = out.view(np.recarray)
@@ -1089,15 +1096,15 @@ def count_by_time(self):
         :Returns: a class:`numpy.recarray`
         """
 
-        if self.timeseries is None:
+        if self._timeseries is None:
             msg = "No timeseries computed, do run() first."
             warnings.warn(msg, category=MissingDataWarning)
             logger.warn(msg)
             return
 
         out = np.empty((len(self.timesteps),), dtype=[('time', float), ('count', int)])
         for cursor, time_count in enumerate(zip(self.timesteps,
-                                               (len(series) for series in self.timeseries))):
+                                               (len(series) for series in self._timeseries))):
             out[cursor] = time_count
         return out.view(np.recarray)
 
@@ -1112,14 +1119,14 @@ def count_by_type(self):
 
         :Returns: a class:`numpy.recarray`
         """
-        if self.timeseries is None:
+        if self._timeseries is None:
             msg = "No timeseries computed, do run() first."
             warnings.warn(msg, category=MissingDataWarning)
             logger.warn(msg)
             return
 
         hbonds = defaultdict(int)
-        for hframe in self.timeseries:
+        for hframe in self._timeseries:
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
                 acceptor, distance, angle) in hframe:
                 donor_resnm, donor_resid, donor_atom = parse_residue(donor)
@@ -1173,14 +1180,14 @@ def timesteps_by_type(self):
         :Returns: a class:`numpy.recarray`
         """
 
-        if self.timeseries is None:
+        if self._timeseries is None:
             msg = "No timeseries computed, do run() first."
             warnings.warn(msg, category=MissingDataWarning)
             logger.warn(msg)
             return
 
         hbonds = defaultdict(list)
-        for (t, hframe) in zip(self.timesteps, self.timeseries):
+        for (t, hframe) in zip(self.timesteps, self._timeseries):
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
             acceptor, distance, angle) in hframe:
                 donor_resnm, donor_resid, donor_atom = parse_residue(donor)

diff --git a/testsuite/MDAnalysisTests/analysis/test_hbonds.py b/testsuite/MDAnalysisTests/analysis/test_hbonds.py
@@ -26,14 +26,17 @@
 from MDAnalysis import SelectionError, SelectionWarning
 
 from numpy.testing import (assert_, assert_equal, assert_array_equal,
-                           assert_raises)
+                           assert_almost_equal, assert_array_almost_equal,
+                           assert_raises, dec)
 import numpy as np
 
 import itertools
 import warnings
 from six import StringIO
 
-from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC
+from MDAnalysisTests import parser_not_found
+from MDAnalysisTests.datafiles import PDB_helix, GRO, XTC, waterPSF, waterDCD
+
 # For type guessing:
 from MDAnalysis.topology.core import guess_atom_type
 from MDAnalysis.core.topologyattrs import Atomtypes
@@ -226,3 +229,92 @@ def run_HBA_dynamic_selections(*args):
                 yield run_HBA_dynamic_selections, s1, s2, s1type
         finally:
             self._tearDown()
+
+
+class TestHydrogenBondAnalysisTIP3P(object):
+    @dec.skipif(parser_not_found('DCD'),
+                'DCD parser not available. Are you using python 3?')
+    def setUp(self):
+        self.universe = u = MDAnalysis.Universe(waterPSF, waterDCD)
+        self.kwargs = {
+            'selection1': 'all',
+            'selection2': 'all',
+            'detect_hydrogens': "distance",
+            'distance': 3.0,
+            'angle': 120.0,
+        }
+        self.h = MDAnalysis.analysis.hbonds.HydrogenBondAnalysis(self.universe, **self.kwargs)
+        self.h.run(verbose=False)
+        self.h.generate_table()
+        self.normalized_timeseries = self._normalize_timeseries()
+
+        # keys are the names in the h.table
+        self.reference = {
+            'distance': {'mean': 2.0208776, 'std': 0.31740859},
+            'angle': {'mean': 155.13521, 'std': 12.98955},
+        }
+
+        # reference values for the table only
+        self.reference_table = {
+            'donor_resnm': ["TIP3"] * len(self.normalized_timeseries),
+            'acceptor_resnm': ["TIP3"] * len(self.normalized_timeseries),
+        }
+
+        # index into timeseries (ADJUST ONCE donor_idx and acceptor_ndx are removed)
+        # with keys being field names in h.table
+        self.columns = {
+            'time': 0,
+            'donor_idx': 1,
+            'acceptor_idx': 2,
+            'donor_index': 3,
+            'acceptor_index': 4,
+            'distance': 7,
+            'angle': 8,
+        }
+
+        # hackish way to allow looping over self.reference and generating tests
+        self._functions = {
+            'mean': np.mean,
+            'std': np.std,
+        }
+
+    def _normalize_timeseries(self):
+        # timeseries in normalized form: (t, d_indx1, a_indx1, d_index0, a_index0, donor, acceptor, dist, angle)
+        #                   array index:  0     1        2        3         4        5      6        7      8
+        timeseries = [[t] + item
+                      for t, hframe in zip(self.h.timesteps, self.h.timeseries)
+                      for item in hframe]
+        return timeseries
+
+    def test_timeseries(self):
+        h = self.h
+        assert_equal(len(h.timeseries), 10)
+        assert_equal(len(self.normalized_timeseries), 29)
+
+        for observable in self.reference:
+            idx = self.columns[observable]
+            for quantity, reference in self.reference[observable].items():
+                func = self._functions[quantity]
+                assert_almost_equal(
+                    func([item[idx] for item in self.normalized_timeseries]), reference,
+                    decimal=5,
+                    err_msg="{quantity}({observable}) does not match reference".format(**vars()))
+
+    def test_table_atoms(self):
+        h = self.h
+        table = h.table
+
+        assert_equal(len(h.table), len(self.normalized_timeseries))
+
+        # test that timeseries and table agree on index data and
+        # hydrogen bond information at atom level
+        for name, idx in self.columns.items():
+            assert_array_almost_equal(table.field(name), [data[idx] for data in self.normalized_timeseries],
+                                      err_msg="table[{name}] and timeseries[{idx} do not agree".format(**vars()))
+
+        # test at residue level (issue #801
+        # https://github.com/MDAnalysis/mdanalysis/issues/801)
+        for name, ref in self.reference_table.items():
+            assert_array_equal(h.table.field(name), ref,
+                               err_msg="resname for {0} do not match (Issue #801)")
+