From ba47e1fdf87f2ab80d1e54159b970dee7d420ba6 Mon Sep 17 00:00:00 2001
From: Oliver Beckstein <orbeckst@gmail.com>
Date: Fri, 12 May 2017 15:56:48 -0700
Subject: [PATCH] hbond analysis: addressed review comments

---
 .../analysis/hbonds/hbond_analysis.py         | 134 ++++++++++++------
 1 file changed, 89 insertions(+), 45 deletions(-)
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
index 38a6729dbc2..7aa82072ea3 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_analysis.py
@@ -92,8 +92,9 @@
    from the output one would use ``u.atoms[index]``.
 
    .. deprecated:: 0.15.0
-      The 1-based indices are being deprecated in favor of zero-based indices
-      and are targeted for removal in 0.17.0.
+      The 1-based indices "donor_idx" and "acceptor_idx" are being deprecated
+      in favor of zero-based indices "donor_index" and "acceptor_index" and are
+      targeted for removal in 0.17.0.
 
 
 Using the :meth:`HydrogenBondAnalysis.generate_table` method one can reformat
@@ -321,17 +322,20 @@ class HydrogenBondAnalysis_OtherFF(HydrogenBondAnalysis):
 
       .. Note::
 
-         Each index variable named *idx* is a 1-based index. To get the :attr:`Atom.index` (the
-         0-based index typically used in MDAnalysis) simply subtract 1, or better, use the 0-based variables named *index*.
+         The index variables *donor_idx* and *acceptor_idx* are 1-based
+         indices. To get the :attr:`Atom.index` (the 0-based index typically
+         used in MDAnalysis) simply subtract 1, or better, use the 0-based
+         variables named *donor_index* and *acceptor_index*.
+
+         For instance, to find an acceptor atom in :attr:`Universe.atoms` by
+         *index* one would use ``u.atoms[acceptor_index]``.
 
-         For instance, to find an acceptor atom in :attr:`Universe.atoms` by *index* one
-         would use ``u.atoms[acceptor_index]``.
 
       .. deprecated:: 0.15.0
-         The donor and acceptor indices being 1-based is deprecated in favor of
-         a zero-based index. The 0-based indices can be accessed by
-         *donor_index* or "acceptor_index"; removal of the 1-based indices is
-         targeted for version 0.17.0
+         The 1-based donor and acceptor indices (*donor_idx* and
+         *acceptor_idx*) are deprecated in favor of 0-based indices. The
+         0-based indices can be accessed by *donor_index* and *acceptor_index*;
+         removal of the 1-based indices is targeted for version 0.17.0
 
    .. automethod:: _get_bonded_hydrogens
 
@@ -585,6 +589,29 @@ def __init__(self, universe, selection1='protein', selection2='all', selection1_
              " of a zero-based index. These can be accessed by 'donor_index' or"
              " 'acceptor_index', removal of the 1-based indices is targeted for"
              " version 0.17.0", category=DeprecationWarning)
+        # REMOVAL (DEPRECATION) instructions for 0.17.0: "1-based indices" (aka "idx" vs 0-based "index")
+        # - remove the warning
+        # - replace 'deprecated 0.15.0' with 'versionchanged 0.17.0' (and adjust text)
+        # - update docs for "timeseries" by removing all "idx (1-based)" entries:
+        #   one hydrogen bond should now look like
+        #      <donor index (0-based)>, <acceptor index (0-based)>, <donor string>, <acceptor string>,
+        #      <distance>, <angle>
+        # - update docs for "table": removed *_idx and renumber
+        # - in run(): removed the 1-based indices (h.index + 1, a.index + 1) from frame_results:
+        #   for instance, should now read (two occurences!)
+        #        frame_results.append(
+        #                            [h.index, a.index,
+        #                             (h.resname, h.resid, h.name),
+        #                             (a.resname, a.resid, a.name),
+        #                              dist, angle])
+        # - _reformat_hb(): change indices hb[:4] --> hb[:2]
+        # - generate_table(), count_*(), timesteps_by_type():
+        #   remove any donor_idx, acceptor_idx variables and fields.
+        # - update tests (change indices in analysis/test_hbonds.py)
+        # - update any docs in this file that mention "1-based"
+        #
+        # I suggest to keep this removal a separate commit. [@orbeckst]
+
 
         self._get_bonded_hydrogens_algorithms = {
             "distance": self._get_bonded_hydrogens_dist,  # 0.7.6 default
@@ -900,7 +927,7 @@ def run(self, **kwargs):
              [``True``]
         debug : bool (optional)
              enable detailed logging of debugging information; this can create
-             *very big* log files so it is disable (``False``) by default; setting
+             *very big* log files so it is disabled (``False``) by default; setting
              `debug` toggles the debug status for :class:`HydrogenBondAnalysis`,
              namely the value of :attr:`HydrogenBondAnalysis.debug`.
 
@@ -926,12 +953,6 @@ def run(self, **kwargs):
            Accept `quiet` keyword. Analysis will now proceed through frames even if
            no donors or acceptors were found in a particular frame.
 
-        .. deprecated:: 0.15.0
-           The donor and acceptor indices being 1-based is deprecated in favor
-           of a zero-based index. This can be accessed by "donor_index" or
-           "acceptor_index" removal of the 1-based indices is targeted
-           for version 0.17.0
-
         .. deprecated:: 0.16
            The `quiet` keyword argument is deprecated in favor of the `verbose`
            one. Previous use of `verbose` now corresponds to the new keyword
@@ -1111,10 +1132,11 @@ def timeseries(self):
 
         Note
         ----
-        Each index variable named *idx* is a 1-based index. To get the
-        :attr:`Atom.index` (the 0-based index typically used in MDAnalysis)
-        simply subtract 1, or better, use the 0-based variables named
-        *index*.
+        The index variables named *donor_idx* and *acceptor_idx* are 1-based
+        indices, which were used historically. To get the :attr:`Atom.index`
+        (the 0-based index typically used in MDAnalysis) simply subtract 1, or
+        better, use the 0-based variables named *donor_index* and
+        *acceptor_index*.
 
         For instance, to find an acceptor atom in :attr:`Universe.atoms` by
         *index* one would use ``u.atoms[acceptor_index]``.
@@ -1141,8 +1163,9 @@ def timeseries(self):
            acceptor atom specifiers as tuples `(resname, resid, atomid)` instead of strings.
 
         .. deprecated:: 0.15.0
-           The 1-based indices are being deprecated in favor of zero-based indices
-           and they are targeted for removal in 0.17.0.
+           The 1-based indices "donor_idx" and "acceptor_idx" are being
+           deprecated in favor of the 0-based indices "donor_index" and
+           "acceptor_index" and they are targeted for removal in 0.17.0.
 
         """
         return [[self._reformat_hb(hb) for hb in hframe] for hframe in self._timeseries]
@@ -1155,9 +1178,14 @@ def _reformat_hb(hb, atomformat="{0[0]!s}{0[1]!s}:{0[2]!s}"):
         resid, atomid). In 0.16.0 and earlier they were stored as a string.
 
         .. deprecated:: 1.0
+           This is a compatibility layer so that we can provide the same output
+           in timeseries as before. However, for 1.0 we should make timeseries
+           just return _timeseries, i.e., change the format of timeseries to
+           the un-ambiguous representation provided in _timeseries.
 
         """
-        # change indices once we remove 1-based donor_idx and acceptor_idx
+        # change indices once we remove 1-based donor_idx and acceptor_idx:
+        # change indices hb[:4]... --> hb[:2], hb[2], hb[3], hb[4:]
         return (hb[:4]
                 + [atomformat.format(hb[4]), atomformat.format(hb[5])]
                 + hb[6:])
@@ -1193,7 +1221,7 @@ def generate_table(self):
         cursor = 0  # current row
         for t, hframe in zip(self.timesteps, self._timeseries):
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
-            acceptor, distance, angle) in hframe:
+                 acceptor, distance, angle) in hframe:
                 # donor|acceptor = (resname, resid, atomid)
                 out[cursor] = (t, donor_idx, acceptor_idx, donor_index, acceptor_index) + \
                 donor + acceptor + (distance, angle)
@@ -1226,7 +1254,7 @@ def save_table(self, filename="hbond_table.pickle"):
         cPickle.dump(self.table, open(filename, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL)
 
     def _has_timeseries(self):
-        has_timeseries = (self._timeseries is not None)
+        has_timeseries = self._timeseries is not None
         if not has_timeseries:
             msg = "No timeseries computed, do run() first."
             warnings.warn(msg, category=MissingDataWarning)
@@ -1236,9 +1264,17 @@ def _has_timeseries(self):
     def count_by_time(self):
         """Counts the number of hydrogen bonds per timestep.
 
+        Processes :attr:`HydrogenBondAnalysis._timeseries` into the time series
+        ``N(t)`` where ``N`` is the total number of observed hydrogen bonds at
+        time ``t``.
+
         Returns
         -------
-        numpy.recarray
+        counts : numpy.recarray
+             The resulting array can be thought of as rows ``(time, N)`` where
+             ``time`` is the time (in ps) of the time step and ``N`` is the
+             total number of hydrogen bonds.
+
         """
         if not self._has_timeseries():
             return
@@ -1252,15 +1288,23 @@ def count_by_time(self):
     def count_by_type(self):
         """Counts the frequency of hydrogen bonds of a specific type.
 
-        Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a
+        Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a
         :class:`numpy.recarray` containing atom indices, residue names, residue
         numbers (for donors and acceptors) and the fraction of the total time
         during which the hydrogen bond was detected.
 
         Returns
         -------
-        numpy.recarray
+        counts : numpy.recarray
+             Each row of the array contains data to define a unique hydrogen
+             bond together with the frequency (fraction of the total time) that
+             it has been observed.
+
 
+        .. deprecated:: 0.15.0
+           The 1-based indices "donor_idx" and "acceptor_idx" are being
+           deprecated in favor of zero-based indices and they are targeted for
+           removal in 0.17.0.
         """
         if not self._has_timeseries():
             return
@@ -1268,7 +1312,7 @@ def count_by_type(self):
         hbonds = defaultdict(int)
         for hframe in self._timeseries:
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
-                acceptor, distance, angle) in hframe:
+                 acceptor, distance, angle) in hframe:
                 donor_resnm, donor_resid, donor_atom = donor
                 acceptor_resnm, acceptor_resid, acceptor_atom = acceptor
                 # generate unambigous key for current hbond \
@@ -1312,14 +1356,25 @@ def count_by_type(self):
     def timesteps_by_type(self):
         """Frames during which each hydrogen bond existed, sorted by hydrogen bond.
 
-        Processes :attr:`HydrogenBondAnalysis.timeseries` and returns a
+        Processes :attr:`HydrogenBondAnalysis._timeseries` and returns a
         :class:`numpy.recarray` containing atom indices, residue names, residue
-        numbers (for donors and acceptors) and a list of timesteps at which the
+        numbers (for donors and acceptors) and each timestep at which the
         hydrogen bond was detected.
 
+        In principle, this is the same as :attr:`~HydrogenBondAnalysis.table`
+        but sorted by hydrogen bond and with additional data for the
+        *donor_heavy_atom* and angle and distance omitted.
+
+
         Returns
         -------
-        numpy.recarray
+        data : numpy.recarray
+
+
+        .. deprecated:: 0.15.0
+           The 1-based indices "donor_idx" and "acceptor_idx" are being
+           deprecated in favor of zero-based indices and they are targeted for
+           removal in 0.17.0.
 
         """
         if not self._has_timeseries():
@@ -1328,7 +1383,7 @@ def timesteps_by_type(self):
         hbonds = defaultdict(list)
         for (t, hframe) in zip(self.timesteps, self._timeseries):
             for (donor_idx, acceptor_idx, donor_index, acceptor_index, donor,
-            acceptor, distance, angle) in hframe:
+                 acceptor, distance, angle) in hframe:
                 donor_resnm, donor_resid, donor_atom = donor
                 acceptor_resnm, acceptor_resid, acceptor_atom = acceptor
                 # generate unambigous key for current hbond
@@ -1425,17 +1480,6 @@ def _donor_lookup_table_byindex(self):
 
            heavy_atom_name = h2donor[index]
 
-        Note
-        ----
-        *index* is the 0-based MDAnalysis index
-        (:attr:`MDAnalysis.core.groups.Atom.index`).  The
-        tables generated by :class:`HydrogenBondAnalysis` contain
-        1-based indices and zero-based indices.
-
-
-        .. deprecated:: 0.15.0
-            The 1-based indices are deprecated in favor of the zero-based indices
-            given by "idx_zero".
         """
         s1d = self._s1_donors  # list of donor Atom instances
         s1h = self._s1_donors_h  # dict indexed by donor position in donor list, containg AtomGroups of H