diff --git a/package/MDAnalysis/analysis/distances.py b/package/MDAnalysis/analysis/distances.py
index ae4ad699141..d524a993625 100644
--- a/package/MDAnalysis/analysis/distances.py
+++ b/package/MDAnalysis/analysis/distances.py
@@ -42,6 +42,7 @@
            'contact_matrix', 'dist', 'between']
 
 import numpy as np
+import scipy.sparse
 
 from MDAnalysis.lib.distances import distance_array, self_distance_array
 from MDAnalysis.lib.c_distances import contact_matrix_no_pbc, contact_matrix_pbc
@@ -51,15 +52,6 @@
 import logging
 logger = logging.getLogger("MDAnalysis.analysis.distances")
 
-try:
-   from scipy import sparse
-except ImportError:
-   sparse = None
-   msg = "scipy.sparse could not be imported: some functionality will " \
-         "not be available in contact_matrix()"
-   warnings.warn(msg, category=ImportWarning)
-   logger.warn(msg)
-   del msg
 
 def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
     '''Calculates a matrix of contacts.
@@ -93,12 +85,6 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
        The contact matrix is returned in a format determined by the `returntype`
        keyword.
 
-
-    Note
-    ----
-    :mod:`scipy.sparse` is require for using *sparse* matrices; if it cannot
-    be imported then an `ImportError` is raised.
-
     See Also
     --------
     :mod:`MDAnalysis.analysis.contacts` for native contact analysis
@@ -112,14 +98,9 @@ def contact_matrix(coord, cutoff=15.0, returntype="numpy", box=None):
         adj = (distance_array(coord, coord, box=box) < cutoff)
         return adj
     elif returntype == "sparse":
-        if sparse is None:
-            # hack: if we are running with minimal dependencies then scipy was
-            #       not imported and we have to bail here (see scipy import at top)
-            raise ImportError("For sparse matrix functionality you need to "
-                              "import scipy.")
         # Initialize square List of Lists matrix of dimensions equal to number
         # of coordinates passed
-        sparse_contacts = sparse.lil_matrix((len(coord), len(coord)), dtype='bool')
+        sparse_contacts = scipy.sparse.lil_matrix((len(coord), len(coord)), dtype='bool')
         if box is not None:
             # with PBC
             contact_matrix_pbc(coord, sparse_contacts, box, cutoff)
diff --git a/package/MDAnalysis/analysis/encore/similarity.py b/package/MDAnalysis/analysis/encore/similarity.py
index bd7e9e5e0ea..a56b3c0c1e5 100644
--- a/package/MDAnalysis/analysis/encore/similarity.py
+++ b/package/MDAnalysis/analysis/encore/similarity.py
@@ -172,21 +172,13 @@
 from __future__ import print_function, division, absolute_import
 from six.moves import range, zip
 
-import MDAnalysis as mda
-import numpy as np
 import warnings
 import logging
-try:
-    from scipy.stats import gaussian_kde
-except ImportError:
-    gaussian_kde = None
-    msg = "scipy.stats.gaussian_kde could not be imported. " \
-          "Dimensionality reduction ensemble comparisons will not " \
-          "be available."
-    warnings.warn(msg,
-                  category=ImportWarning)
-    logging.warn(msg)
-    del msg
+
+import numpy as np
+import scipy.stats
+
+import MDAnalysis as mda
 
 from ...coordinates.memory import MemoryReader
 from .confdistmatrix import get_distance_matrix
@@ -460,18 +452,11 @@ def gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
     embedded_ensembles = []
     resamples = []
 
-    if gaussian_kde is None:
-        # hack: if we are running with minimal dependencies then scipy was
-        # not imported and we have to bail here (see scipy import at top)
-        raise ImportError("For Kernel Density Estimation functionality you"
-                          "need to import scipy")
-
     for i in range(1, nensembles + 1):
         this_embedded = embedded_space.transpose()[
             np.where(np.array(ensemble_assignment) == i)].transpose()
         embedded_ensembles.append(this_embedded)
-        kdes.append(gaussian_kde(
-            this_embedded))
+        kdes.append(scipy.stats.gaussian_kde(this_embedded))
 
     # # Set number of samples
     # if not nsamples:
@@ -623,12 +608,6 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
 
     """
 
-    if gaussian_kde is None:
-        # hack: if we are running with minimal dependencies then scipy was
-        # not imported and we have to bail here (see scipy import at top)
-        raise ImportError("For Kernel Density Estimation functionality you"
-                          "need to import scipy")
-
     kdes = []
     embedded_ensembles = []
     resamples = []
@@ -639,8 +618,7 @@ def cumulative_gen_kde_pdfs(embedded_space, ensemble_assignment, nensembles,
             np.logical_and(ensemble_assignment >= ens_id_min,
                               ensemble_assignment <= i))].transpose()
         embedded_ensembles.append(this_embedded)
-        kdes.append(
-            gaussian_kde(this_embedded))
+        kdes.append(scipy.stats.gaussian_kde(this_embedded))
 
     # Resample according to probability distributions
     for this_kde in kdes:
diff --git a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
index a0bf6bb5451..bf81e3529c5 100644
--- a/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
+++ b/package/MDAnalysis/analysis/hbonds/hbond_autocorrel.py
@@ -155,6 +155,8 @@
 from __future__ import division, absolute_import
 from six.moves import zip
 import numpy as np
+import scipy.optimize
+
 import warnings
 
 from MDAnalysis.lib.log import ProgressMeter
@@ -162,7 +164,7 @@
 
 
 class HydrogenBondAutoCorrel(object):
-    """Perform a time autocorrelation of the hydrogen bonds in the system. 
+    """Perform a time autocorrelation of the hydrogen bonds in the system.
 
     Parameters
     ----------
@@ -421,8 +423,9 @@ def solve(self, p_guess=None):
             Initial guess for the leastsq fit, must match the shape of the
             expected coefficients
 
-        Continuous defition results are fitted to a double exponential,
-        intermittent definition are fit to a triple exponential.
+        Continuous defition results are fitted to a double exponential with
+        :func:`scipy.optimize.leastsq`, intermittent definition are fit to a
+        triple exponential.
 
         The results of this fitting procedure are saved into the *fit*,
         *tau* and *estimate* keywords in the solution dict.
@@ -434,14 +437,14 @@ def solve(self, p_guess=None):
          - *estimate* contains the estimate provided by the fit of the time
            autocorrelation function
 
-        In addition, the output of the leastsq function is saved into the
-        solution dict
+        In addition, the output of the :func:`~scipy.optimize.leastsq` function
+        is saved into the solution dict
 
          - *infodict*
          - *mesg*
          - *ier*
+
         """
-        from scipy.optimize import leastsq
 
         if self.solution['results'] is None:
             raise ValueError(
@@ -498,9 +501,8 @@ def triple(x, A1, A2, tau1, tau2, tau3):
             if p_guess is None:
                 p_guess = (0.5, 10 * self.sample_time, self.sample_time)
 
-            p, cov, infodict, mesg, ier = leastsq(err, p_guess,
-                                                  args=(time, results),
-                                                  full_output=True)
+                p, cov, infodict, mesg, ier = scipy.optimize.leastsq(
+                err, p_guess, args=(time, results), full_output=True)
             self.solution['fit'] = p
             A1, tau1, tau2 = p
             A2 = 1 - A1
@@ -512,9 +514,8 @@ def triple(x, A1, A2, tau1, tau2, tau3):
                 p_guess = (0.33, 0.33, 10 * self.sample_time,
                            self.sample_time, 0.1 * self.sample_time)
 
-            p, cov, infodict, mesg, ier = leastsq(err, p_guess,
-                                                  args=(time, results),
-                                                  full_output=True)
+            p, cov, infodict, mesg, ier = scipy.optimize.leastsq(
+                err, p_guess, args=(time, results), full_output=True)
             self.solution['fit'] = p
             A1, A2, tau1, tau2, tau3 = p
             A3 = 1 - A1 - A2
diff --git a/package/MDAnalysis/analysis/hole.py b/package/MDAnalysis/analysis/hole.py
index e43316a868c..7088e5f8a7c 100644
--- a/package/MDAnalysis/analysis/hole.py
+++ b/package/MDAnalysis/analysis/hole.py
@@ -245,7 +245,6 @@
 from six.moves import zip, cPickle
 import six
 
-import numpy as np
 import glob
 import os
 import errno
@@ -258,6 +257,10 @@
 import logging
 from itertools import cycle
 
+import numpy as np
+import matplotlib
+import matplotlib.pyplot as plt
+
 from MDAnalysis import Universe
 from MDAnalysis.exceptions import ApplicationError
 from MDAnalysis.lib.util import which, realpath, asiterable
@@ -370,8 +373,6 @@ def save(self, filename="hole.pickle"):
         cPickle.dump(self.profiles, open(filename, "wb"), cPickle.HIGHEST_PROTOCOL)
 
     def _process_plot_kwargs(self, kwargs):
-        import matplotlib.colors
-
         kw = {}
         frames = kwargs.pop('frames', None)
         if frames is None:
@@ -448,9 +449,6 @@ def plot(self, **kwargs):
            Returns ``ax``.
 
         """
-
-        import matplotlib.pyplot as plt
-
         kw, kwargs = self._process_plot_kwargs(kwargs)
 
         ax = kwargs.pop('ax', None)
@@ -517,8 +515,7 @@ def plot3D(self, **kwargs):
            Returns ``ax``.
 
         """
-
-        import matplotlib.pyplot as plt
+        # installed with matplotlib; imported here to enable 3D axes
         from mpl_toolkits.mplot3d import Axes3D
 
         kw, kwargs = self._process_plot_kwargs(kwargs)
@@ -540,8 +537,7 @@ def plot3D(self, **kwargs):
                 rxncoord = profile.rxncoord
             else:
                 # does not seem to work with masked arrays but with nan hack!
-                # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value
-                # -for-the-z-axis
+                # http://stackoverflow.com/questions/4913306/python-matplotlib-mplot3d-how-do-i-set-a-maximum-value-for-the-z-axis
                 #radius = np.ma.masked_greater(profile.radius, rmax)
                 #rxncoord = np.ma.array(profile.rxncoord, mask=radius.mask)
                 rxncoord = profile.rxncoord
diff --git a/package/MDAnalysis/analysis/legacy/x3dna.py b/package/MDAnalysis/analysis/legacy/x3dna.py
index f960cd1fc5d..99cb4a8fc65 100644
--- a/package/MDAnalysis/analysis/legacy/x3dna.py
+++ b/package/MDAnalysis/analysis/legacy/x3dna.py
@@ -132,13 +132,15 @@
 import errno
 import shutil
 import warnings
-import numpy as np
 import os.path
 import subprocess
 import tempfile
 import textwrap
 from collections import OrderedDict
 
+import numpy as np
+import matplotlib.pyplot as plt
+
 from MDAnalysis import ApplicationError
 from MDAnalysis.lib.util import which, realpath, asiterable
 
@@ -413,7 +415,6 @@ def plot(self, **kwargs):
              Provide `ax` to have all plots plotted in the same axes.
 
         """
-        import matplotlib.pyplot as plt
 
         na_avg, na_std = self.mean_std()
         for k in range(len(na_avg[0])):
diff --git a/package/MDAnalysis/analysis/pca.py b/package/MDAnalysis/analysis/pca.py
index 18d729ecc08..b9920a27ed3 100644
--- a/package/MDAnalysis/analysis/pca.py
+++ b/package/MDAnalysis/analysis/pca.py
@@ -106,6 +106,7 @@
 import warnings
 
 import numpy as np
+import scipy.integrate
 
 from MDAnalysis import Universe
 from MDAnalysis.analysis.align import _fit_to
@@ -357,9 +358,9 @@ def cosine_content(pca_space, i):
     .. [BerkHess1] Berk Hess. Convergence of sampling in protein simulations.
                    Phys. Rev. E 65, 031910 (2002).
     """
-    from scipy.integrate import simps
+
     t = np.arange(len(pca_space))
     T = len(pca_space)
     cos = np.cos(np.pi * t * (i + 1) / T)
-    return ((2.0 / T) * (simps(cos*pca_space[:, i])) ** 2 /
-            simps(pca_space[:, i] ** 2))
+    return ((2.0 / T) * (scipy.integrate.simps(cos*pca_space[:, i])) ** 2 /
+            scipy.integrate.simps(pca_space[:, i] ** 2))
diff --git a/package/MDAnalysis/analysis/polymer.py b/package/MDAnalysis/analysis/polymer.py
index 645055e1878..b7babb204ab 100644
--- a/package/MDAnalysis/analysis/polymer.py
+++ b/package/MDAnalysis/analysis/polymer.py
@@ -36,6 +36,8 @@
 from six.moves import range
 
 import numpy as np
+import scipy.optimize
+
 import logging
 
 from .. import NoDataError
@@ -165,13 +167,10 @@ def fit_exponential_decay(x, y):
     -----
     This function assumes that data starts at 1.0 and decays to 0.0
 
-    Requires scipy
     """
-    from scipy.optimize import curve_fit
-
     def expfunc(x, a):
         return np.exp(-x/a)
 
-    a = curve_fit(expfunc, x, y)[0][0]
+    a = scipy.optimize.curve_fit(expfunc, x, y)[0][0]
 
     return a
diff --git a/package/MDAnalysis/analysis/psa.py b/package/MDAnalysis/analysis/psa.py
index 6c002241a40..f7178d678c1 100644
--- a/package/MDAnalysis/analysis/psa.py
+++ b/package/MDAnalysis/analysis/psa.py
@@ -216,7 +216,11 @@
 from six.moves import range, cPickle
 
 import numpy as np
-import warnings,numbers
+from scipy import spatial, cluster
+import matplotlib
+
+import warnings
+import numbers
 
 import MDAnalysis
 import MDAnalysis.analysis.align
@@ -396,13 +400,14 @@ def hausdorff(P, Q):
 
     Notes
     -----
-    - The Hausdorff distance is calculated in a brute force manner from the
-      distance matrix without further optimizations, essentially following
-      [Huttenlocher1993]_.
-    - :func:`scipy.spatial.distance.directed_hausdorff` is an optimized
-      implementation of the early break algorithm of [Taha2015]_; note that
-      one still has to calculate the *symmetric* Hausdorff distance as
-      `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`.
+    The Hausdorff distance is calculated in a brute force manner from the
+    distance matrix without further optimizations, essentially following
+    [Huttenlocher1993]_.
+
+    :func:`scipy.spatial.distance.directed_hausdorff` is an optimized
+    implementation of the early break algorithm of [Taha2015]_; note that one
+    still has to calculate the *symmetric* Hausdorff distance as
+    `max(directed_hausdorff(P, Q)[0], directed_hausdorff(Q, P)[0])`.
 
     References
     ----------
@@ -415,6 +420,10 @@ def hausdorff(P, Q):
        calculating the exact Hausdorff distance. IEEE Transactions On Pattern
        Analysis And Machine Intelligence, 37:2153-63, 2015.
 
+    SeeAlso
+    -------
+    scipy.spatial.distance.directed_hausdorff
+
     """
     N, axis = get_coord_axes(P)
     d = get_msd_matrix(P, Q, axis=axis)
@@ -1650,7 +1659,7 @@ def plot(self, filename=None, linkage='ward', count_sort=False,
 
         If `filename` is supplied then the figure is also written to file (the
         suffix determines the file type, e.g. pdf, png, eps, ...). All other
-        keyword arguments are passed on to :func:`matplotlib.pyplot.imshow`.
+        keyword arguments are passed on to :func:`matplotlib.pyplot.matshow`.
 
 
         Parameters
@@ -1669,6 +1678,15 @@ def plot(self, filename=None, linkage='ward', count_sort=False,
              set the font size for colorbar labels; font size for path labels on
              dendrogram default to 3 points smaller [``12``]
 
+        Returns
+        -------
+        Z
+          `Z` from :meth:`cluster`
+        dgram
+          `dgram` from :meth:`cluster`
+        dist_matrix_clus
+          clustered distance matrix (reordered)
+
         """
         from matplotlib.pyplot import figure, colorbar, cm, savefig, clf
 
@@ -1770,6 +1788,23 @@ def plot_annotated_heatmap(self, filename=None, linkage='ward',             \
         annot_size : float
              font size of annotation labels on heat map [``6.5``]
 
+        Returns
+        -------
+        Z
+          `Z` from :meth:`cluster`
+        dgram
+          `dgram` from :meth:`cluster`
+        dist_matrix_clus
+          clustered distance matrix (reordered)
+
+
+        Note
+        ----
+        This function requires the seaborn_ package, which can be installed
+        with `pip install seaborn` or `conda install seaborn`.
+
+        .. _seaborn: https://seaborn.pydata.org/
+
         """
         from matplotlib.pyplot import figure, colorbar, cm, savefig, clf
 
@@ -1870,6 +1905,17 @@ def plot_nearest_neighbors(self, filename=None, idx=0,                      \
              set the font size for colorbar labels; font size for path labels on
              dendrogram default to 3 points smaller [``12``]
 
+        Returns
+        -------
+        ax : axes
+
+        Note
+        ----
+        This function requires the seaborn_ package, which can be installed
+        with `pip install seaborn` or `conda install seaborn`.
+
+        .. _seaborn: https://seaborn.pydata.org/
+
         """
         from matplotlib.pyplot import figure, savefig, tight_layout, clf, show
         try:
@@ -1927,7 +1973,8 @@ def plot_nearest_neighbors(self, filename=None, idx=0,                      \
             head = self.targetdir + self.datadirs['plots']
             outfile = os.path.join(head, filename)
             savefig(outfile, dpi=300, bbox_inches='tight')
-        show()
+
+        return ax
 
 
     def cluster(self, distArray, method='ward', count_sort=False,               \
@@ -1955,22 +2002,28 @@ def cluster(self, distArray, method='ward', count_sort=False,               \
 
         Returns
         -------
-        list
+        Z
+            output from :func:`scipy.cluster.hierarchy.linkage`;
             list of indices representing the row-wise order of the objects
             after clustering
+        dgram
+            output from :func:`scipy.cluster.hierarchy.dendrogram`
         """
-        import matplotlib
-        from scipy.cluster.hierarchy import linkage, dendrogram
-
+        # perhaps there is a better way to manipulate the plot... or perhaps it
+        # is not even necessary? In any case, the try/finally makes sure that
+        # we are not permanently changing the user's global state
+        orig_linewidth = matplotlib.rcParams['lines.linewidth']
         matplotlib.rcParams['lines.linewidth'] = 0.5
-
-        Z = linkage(distArray, method=method)
-        dgram = dendrogram(Z, no_labels=no_labels, orientation='left',          \
-                           count_sort=count_sort, distance_sort=distance_sort,  \
-                           no_plot=no_plot, color_threshold=color_threshold)
+        try:
+            Z = cluster.hierarchy.linkage(distArray, method=method)
+            dgram = cluster.hierarchy.dendrogram(
+                Z, no_labels=no_labels, orientation='left',
+                count_sort=count_sort, distance_sort=distance_sort,
+                no_plot=no_plot, color_threshold=color_threshold)
+        finally:
+            matplotlib.rcParams['lines.linewidth'] = orig_linewidth
         return Z, dgram
 
-
     def _get_plot_obj_locs(self):
         """Find and return coordinates for dendrogram, heat map, and colorbar.
 
@@ -2005,7 +2058,8 @@ def get_num_atoms(self):
 
         Returns
         -------
-        the number of atoms
+        int
+            the number of atoms
 
         Note
         ----
@@ -2077,8 +2131,7 @@ def get_pairwise_distances(self, vectorform=False):
             err_str = "No distance data; do 'PSAnalysis.run(store=True)' first."
             raise ValueError(err_str)
         if vectorform:
-            from scipy.spatial.distance import squareform
-            return squareform(self.D)
+            return spatial.distance.squareform(self.D)
         else:
             return self.D