Merge branch 'develop' into densityanalysis-2502

siddharthjain1611 · Mar 19, 2020 · 1c2e943 · 1c2e943
2 parents 1bfb9a8 + a78307f
commit 1c2e943
Show file tree

Hide file tree

Showing 10 changed files with 237 additions and 67 deletions.
diff --git a/package/AUTHORS b/package/AUTHORS
@@ -138,6 +138,7 @@ Chronological list of authors
   - Yuxuan Zhuang
   - Abhishek Shandilya
   - Morgan L. Nance
+  - Faraaz Shah
   - Wiep van der Toorn
 
 

diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -16,12 +16,13 @@ The rules for this file:
 mm/dd/yy richardjgowers, kain88-de, lilyminium, p-j-smith, bdice, joaomcteixeira,
          PicoCentauri, davidercruz, jbarnoud, RMeli, IAlibay, mtiberti, CCook96,
          Yuan-Yu, xiki-tempula, HTian1997, Iv-Hristov, hmacdope, AnshulAngaria,
-         ss62171, Luthaf, yuxuanzhuang, abhishandy, mlnance, orbeckst,
+         ss62171, Luthaf, yuxuanzhuang, abhishandy, mlnance, shfrz, orbeckst,
          wvandertoorn
 
   * 0.21.0
 
 Fixes
+  * Updated tests to have explicit fixtures (Issue #2618)
   * XDR offsets now read from trajectory if offsets file read-in fails on
     IOError (Issue #1893, PR #2611)
   * Fixed the deprecation warning from `collections` library in `flatten_dict`
@@ -69,6 +70,8 @@ Fixes
     match TPRParser. (PR #2408)
   * Added parmed to setup.py
   * Fixed example docs for polymer persistence length (#2582)
+  * Fixed HydrogenBondAnalysis to return atom indices rather than atom ids (PR #2572).
+    Fixed the check for bond information in the _get_dh_pairs method (Issue #2396).
   * Added missing selection module to leaflet.py (Issue #2612)
 
 Enhancements
@@ -108,6 +111,7 @@ Enhancements
   * Improve the distance search in water bridge analysis with capped_distance (PR #2480)
 
 Changes
+  * Removed `details` from `ClusteringMethod`s (Issue #2575, PR #2620)
   * Removed deprecated :meth:`PersistenceLength.perform_fit` (Issue #2596)
   * Changed :meth:`PSAnalysis.generate_paths` keywords `store` and `filename`
     defaults to `False` and `None` (Issue #2593)

diff --git a/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py b/package/MDAnalysis/analysis/encore/clustering/ClusteringMethod.py
@@ -84,10 +84,11 @@ def __call__(self, x):
             encore.utils.TriangularMatrix, encoding the conformational
             distance matrix
 
-        Returns
-        -------
-        numpy.array
-            list of cluster indices
+        Raises
+        ------
+        NotImplementedError
+           Method or behavior needs to be defined by a subclass    
+        
         """
         raise NotImplementedError("Class {0} doesn't implement __call__()"
                                   .format(self.__class__.__name__))
@@ -146,8 +147,11 @@ def __call__(self, distance_matrix):
 
         Returns
         -------
-        numpy.array
-            list of cluster indices
+        numpy.array : array, shape(n_elements) 
+            centroid frames of the clusters for all of the elements
+
+        .. versionchanged:: 1.0.0
+           This method no longer returns ``details``
         """
         clusters = affinityprop.AffinityPropagation(
             s=distance_matrix * -1.,   # invert sign
@@ -156,9 +160,8 @@ def __call__(self, distance_matrix):
             max_iterations = self.max_iter,
             convergence = self.convergence_iter,
             noise=int(self.add_noise))
-        details = {}
-        return clusters, details
-
+
+        return clusters
 if sklearn:
 
     class AffinityPropagation(ClusteringMethod):
@@ -214,9 +217,11 @@ def __call__(self, distance_matrix):
 
             Returns
             -------
-            numpy.array
-                list of cluster indices
+            numpy.array : array, shape(n_elements) 
+                centroid frames of the clusters for all of the elements
 
+            .. versionchanged:: 1.0.0
+               This method no longer returns ``details``
             """
             logging.info("Starting Affinity Propagation: {0}".format
                          (self.ap.get_params()))
@@ -226,8 +231,9 @@ def __call__(self, distance_matrix):
             clusters = self.ap.fit_predict(similarity_matrix)
             clusters = encode_centroid_info(clusters,
                                             self.ap.cluster_centers_indices_)
-            details = {}
-            return clusters, details
+
+            return clusters
+
 
 
     class DBSCAN(ClusteringMethod):
@@ -290,9 +296,11 @@ def __call__(self, distance_matrix):
 
             Returns
             -------
-            numpy.array
-                list of cluster indices
+            numpy.array : array, shape(n_elements) 
+                centroid frames of the clusters for all of the elements
 
+            .. versionchanged:: 1.0.0
+               This method no longer returns ``details``
             """
             logging.info("Starting DBSCAN: {0}".format(
                 self.dbscan.get_params()))
@@ -304,8 +312,8 @@ def __call__(self, distance_matrix):
             cluster_representatives = np.unique(clusters, return_index=True)[1]
             clusters = encode_centroid_info(clusters,
                                             cluster_representatives)
-            details = {}
-            return clusters, details
+
+            return clusters
 
     class KMeans(ClusteringMethod):
 
@@ -414,8 +422,11 @@ def __call__(self, coordinates):
 
             Returns
             -------
-            numpy.array
-                list of cluster indices
+            numpy.array : array, shape(n_elements) 
+                centroid frames of the clusters for all of the elements
+
+            .. versionchanged:: 1.0.0
+               This method no longer returns ``details``
             """
             logging.info("Starting Kmeans: {0}".format(
                          (self.kmeans.get_params())))
@@ -424,5 +435,5 @@ def __call__(self, coordinates):
             cluster_center_indices = np.argmin(distances, axis=0)
             clusters = encode_centroid_info(clusters,
                                              cluster_center_indices)
-            details = {}
-            return clusters, details
+
+            return clusters
diff --git a/package/MDAnalysis/analysis/encore/clustering/cluster.py b/package/MDAnalysis/analysis/encore/clustering/cluster.py
@@ -231,7 +231,7 @@ def cluster(ensembles,
 
     # Create clusters collections from clustering results,
     # one for each cluster. None if clustering didn't work.
-    ccs = [ClusterCollection(clusters[1][0],
+    ccs = [ClusterCollection(clusters[1],
                              metadata=metadata) for clusters in results]
 
     if allow_collapsed_result and len(ccs) == 1:

diff --git a/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py b/package/MDAnalysis/analysis/hydrogenbonds/hbond_analysis.py
@@ -165,14 +165,14 @@
 
 .. autoclass:: HydrogenBondAnalysis
    :members:
-
 """
 from __future__ import absolute_import, division
 
 import numpy as np
 
 from .. import base
 from MDAnalysis.lib.distances import capped_distance, calc_angles
+from MDAnalysis.exceptions import NoDataError
 
 from ...due import due, Doi
 
@@ -409,9 +409,12 @@ def _get_dh_pairs(self):
         # If donors_sel is not provided, use topology to find d-h pairs
         if not self.donors_sel:
 
-            if len(self.u.bonds) == 0:
-                raise Exception('Cannot assign donor-hydrogen pairs via topology as no bonded information is present. '
-                                'Please either: load a topology file with bonded information; use the guess_bonds() '
+            # We're using u._topology.bonds rather than u.bonds as it is a million times faster to access.
+            # This is because u.bonds also calculates properties of each bond (e.g bond length).
+            # See https://github.com/MDAnalysis/mdanalysis/issues/2396#issuecomment-596251787
+            if not (hasattr(self.u._topology, 'bonds') and len(self.u._topology.bonds.values) != 0):
+                raise NoDataError('Cannot assign donor-hydrogen pairs via topology as no bond information is present. '
+                                'Please either: load a topology file with bond information; use the guess_bonds() '
                                 'topology guesser; or set HydrogenBondAnalysis.donors_sel so that a distance cutoff '
                                 'can be used.')
 
@@ -496,9 +499,9 @@ def _single_frame(self):
 
         # Store data on hydrogen bonds found at this frame
         self.hbonds[0].extend(np.full_like(hbond_donors, self._ts.frame))
-        self.hbonds[1].extend(hbond_donors.ids)
-        self.hbonds[2].extend(hbond_hydrogens.ids)
-        self.hbonds[3].extend(hbond_acceptors.ids)
+        self.hbonds[1].extend(hbond_donors.indices)
+        self.hbonds[2].extend(hbond_hydrogens.indices)
+        self.hbonds[3].extend(hbond_acceptors.indices)
         self.hbonds[4].extend(hbond_distances)
         self.hbonds[5].extend(hbond_angles)
 

diff --git a/testsuite/MDAnalysisTests/analysis/test_encore.py b/testsuite/MDAnalysisTests/analysis/test_encore.py
@@ -462,7 +462,7 @@ def test_clustering_two_methods(self, ens1):
     def test_clustering_AffinityPropagationNative_direct(self, ens1):
         method = encore.AffinityPropagationNative()
         distance_matrix = encore.get_distance_matrix(ens1)
-        cluster_assignment, details = method(distance_matrix)
+        cluster_assignment = method(distance_matrix)
         expected_value = 7
         assert len(set(cluster_assignment)) == expected_value, \
                      "Unexpected result: {0}".format(cluster_assignment)
@@ -471,7 +471,7 @@ def test_clustering_AffinityPropagation_direct(self, ens1):
         pytest.importorskip('sklearn')
         method = encore.AffinityPropagation()
         distance_matrix = encore.get_distance_matrix(ens1)
-        cluster_assignment, details = method(distance_matrix)
+        cluster_assignment = method(distance_matrix)
         expected_value = 7
         assert len(set(cluster_assignment)) == expected_value, \
                      "Unexpected result: {0}".format(cluster_assignment)
@@ -483,15 +483,15 @@ def test_clustering_KMeans_direct(self, ens1):
         coordinates = ens1.trajectory.timeseries(order='fac')
         coordinates = np.reshape(coordinates,
                                  (coordinates.shape[0], -1))
-        cluster_assignment, details = method(coordinates)
+        cluster_assignment = method(coordinates)
         assert len(set(cluster_assignment)) == clusters, \
                      "Unexpected result: {0}".format(cluster_assignment)
 
     def test_clustering_DBSCAN_direct(self, ens1):
         pytest.importorskip('sklearn')
         method = encore.DBSCAN(eps=0.5, min_samples=2)
         distance_matrix = encore.get_distance_matrix(ens1)
-        cluster_assignment, details = method(distance_matrix)
+        cluster_assignment = method(distance_matrix)
         expected_value = 2
         assert len(set(cluster_assignment)) == expected_value, \
                      "Unexpected result: {0}".format(cluster_assignment)

diff --git a/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py b/testsuite/MDAnalysisTests/analysis/test_hydrogenbonds_analysis.py
@@ -27,9 +27,11 @@
 import numpy as np
 import MDAnalysis
 from MDAnalysis.analysis.hydrogenbonds.hbond_analysis import HydrogenBondAnalysis
+from MDAnalysis.exceptions import NoDataError
 
 import pytest
-from numpy.testing import assert_allclose, assert_array_almost_equal, assert_array_equal
+from numpy.testing import assert_allclose, assert_equal, assert_array_almost_equal, assert_array_equal, \
+    assert_almost_equal
 from MDAnalysisTests.datafiles import waterPSF, waterDCD
 
 
@@ -87,15 +89,125 @@ def test_count_by_type(self, h):
         counts = h.count_by_type()
         assert int(counts[0, 2]) == ref_count
 
-    def test_count_by_ids(self, h):
+    def test_count_by_ids(self, h, universe):
 
         ref_counts = [1.0, 1.0, 0.5, 0.4, 0.2, 0.1]
         unique_hbonds = h.count_by_ids()
 
+        most_common_hbond_ids = [12, 14, 9]
+        assert_equal(unique_hbonds[0,:3], most_common_hbond_ids)
+
         # count_by_ids() returns raw counts
         # convert to fraction of time that bond was observed
         counts = unique_hbonds[:, 3] / len(h.timesteps)
 
+        assert_allclose(counts, ref_counts)
+
+
+class TestHydrogenBondAnalysisMock(object):
+
+    kwargs = {
+        'donors_sel': 'name O',
+        'hydrogens_sel': 'name H1 H2',
+        'acceptors_sel': 'name O',
+        'd_h_cutoff': 1.2,
+        'd_a_cutoff': 3.0,
+        'd_h_a_angle_cutoff': 120.0
+    }
+
+    @staticmethod
+    @pytest.fixture(scope='class')
+    def universe():
+        # create two water molecules
+        """
+                       H4
+                        \
+            O1-H2 .... O2-H3
+           /
+          H1
+        """
+        n_residues = 2
+        u = MDAnalysis.Universe.empty(
+            n_atoms=n_residues*3,
+            n_residues=n_residues,
+            atom_resindex=np.repeat(range(n_residues), 3),
+            residue_segindex=[0] * n_residues,
+            trajectory=True,  # necessary for adding coordinates
+            )
+
+        u.add_TopologyAttr('name', ['O', 'H1', 'H2'] * n_residues)
+        u.add_TopologyAttr('type', ['O', 'H', 'H'] * n_residues)
+        u.add_TopologyAttr('resname', ['SOL'] * n_residues)
+        u.add_TopologyAttr('resid', list(range(1, n_residues + 1)))
+        u.add_TopologyAttr('id', list(range(1, (n_residues * 3) + 1)))
+
+        # Atomic coordinates with a single hydrogen bond between O1-H2---O2
+        pos1 = np.array([[0, 0, 0],             # O1
+                        [-0.249, -0.968, 0],    # H1
+                        [1, 0, 0],              # H2
+                        [2.5, 0, 0],            # O2
+                        [3., 0, 0],             # H3
+                        [2.250, 0.968, 0]       # H4
+                        ])
+
+        # Atomic coordinates with no hydrogen bonds
+        pos2 = np.array([[0, 0, 0],             # O1
+                         [-0.249, -0.968, 0],   # H1
+                         [1, 0, 0],             # H2
+                         [4.5, 0, 0],           # O2
+                         [5., 0, 0],            # H3
+                         [4.250, 0.968, 0]      # H4
+                         ])
+
+        coordinates = np.empty((3,  # number of frames
+                                u.atoms.n_atoms,
+                                3))
+        coordinates[0] = pos1
+        coordinates[1] = pos2
+        coordinates[2] = pos1
+        u.load_new(coordinates, order='fac')
+
+        return u
+
+    def test_no_bond_info_exception(self, universe):
+
+        kwargs = {
+            'donors_sel': None,
+            'hydrogens_sel': None,
+            'acceptors_sel': None,
+            'd_h_cutoff': 1.2,
+            'd_a_cutoff': 3.0,
+            'd_h_a_angle_cutoff': 120.0
+        }
+
+        with pytest.raises(NoDataError, match="no bond information"):
+            h = HydrogenBondAnalysis(universe, **kwargs)
+            h._get_dh_pairs()
+
+    def test_first(self, universe):
+
+        h = HydrogenBondAnalysis(universe, **self.kwargs)
+        h.run()
+
+        assert len(h.hbonds) == 2
+
+        frame_no, donor_index, hydrogen_index, acceptor_index, da_dst, dha_angle = h.hbonds[0]
+        assert_equal(donor_index, 0)
+        assert_equal(hydrogen_index, 2)
+        assert_equal(acceptor_index, 3)
+        assert_almost_equal(da_dst, 2.5)
+        assert_almost_equal(dha_angle, 180)
+
+    def test_count_by_time(self, universe):
+
+        h = HydrogenBondAnalysis(universe, **self.kwargs)
+        h.run()
+
+        ref_times = np.array([0, 1, 2]) # u.trajectory.dt is 1
+        ref_counts = np.array([1, 0, 1])
+
+        counts = h.count_by_time()
+        assert_array_almost_equal(h.timesteps, ref_times)
         assert_array_equal(counts, ref_counts)
 
 
@@ -112,7 +224,6 @@ class TestHydrogenBondAnalysisTIP3P_GuessAcceptors_GuessHydrogens_UseTopology_(T
         'd_h_a_angle_cutoff': 120.0
     }
 
-
 class TestHydrogenBondAnalysisTIP3P_GuessDonors_NoTopology(object):
     """Guess the donor atoms involved in hydrogen bonds using the partial charges of the atoms.
     """