From 3800f32ae45c4ccfe8aa3221d5996b9e61363881 Mon Sep 17 00:00:00 2001
From: Thomas Boggs <tboggs@users.sourceforge.net>
Date: Tue, 18 Mar 2014 00:21:23 -0400
Subject: [PATCH] Handle np.nan in image data [closes #2].

---
 CHANGELOG.txt                     | 20 ++++++++++++++++++++
 spectral/algorithms/algorithms.py | 11 ++++++++++-
 spectral/algorithms/clustering.py | 28 ++++++++++++++++++----------
 spectral/algorithms/spymath.py    | 12 ++++++++++++
 spectral/io/spyfile.py            |  7 ++++++-
 5 files changed, 66 insertions(+), 12 deletions(-)

diff --git a/CHANGELOG.txt b/CHANGELOG.txt
index e4a6303..9cfda82 100644
--- a/CHANGELOG.txt
+++ b/CHANGELOG.txt
@@ -2,6 +2,26 @@
 # Spectral Python (SPy) package change log
 ##############################################################################
 
+2014-03-18	tboggs <tboggs@users.sourceforge.net>
+
+    [closes #2] Handle image data containing NaN. Rather than just passing
+    image data through to SPy functions, several checks are now made to
+    either warn or raise an exception when `np.nan` is found in the data
+
+    * algorithms/algorithms.py (calc_stats): New argument `allow_nan` (False
+      by default) indicates whether nan should be allowed in the data. If
+      False, a NanValueError is raised when nan is present.
+    * algorithms/clustering.py:
+      (kmeans): Make sure status percentage ends in all code paths
+      (kmeans_ndarray): Make sure status percentage ends in all code paths.
+      Raise NanValueError if nan is present.
+    * algorithms/spymath.py:
+      (NaNValueWarning): New class
+      (NaNValueError): New class
+      (has_nan): New function to determine if np.nan is present in the data.
+    * io/spyfile.py (SpyFile.load): Issue NaNValueWarning if np.nan is present
+      in the image data.
+
 2014-03-11	tboggs <tboggs@users.sourceforge.net>
 
     [fixed #1] - Pixel row/col is not displayed in imshow window when pixel
diff --git a/spectral/algorithms/algorithms.py b/spectral/algorithms/algorithms.py
index 5477b6f..d122633 100644
--- a/spectral/algorithms/algorithms.py
+++ b/spectral/algorithms/algorithms.py
@@ -723,7 +723,7 @@ def get_whitening_transform(self):
         return LinearTransform(matrix_sqrt(C_1, True), pre=-self.mean)
 
 
-def calc_stats(image, mask=None, index=None):
+def calc_stats(image, mask=None, index=None, allow_nan=False):
     '''Computes Gaussian stats for image data..
 
     Arguments:
@@ -746,6 +746,12 @@ def calc_stats(image, mask=None, index=None):
             `image`. If not specified but `mask` is, then all nonzero elements
             of `mask` will be used.
 
+        `allow_nan` (bool, default False):
+
+            If True, statistics will be computed even if `np.nan` values are
+            present in the data; otherwise, `~spectral.algorithms.spymath.NaNValueError`
+            is raised.
+
         If neither `mask` nor `index` are specified, all samples in `vectors`
         will be used.
 
@@ -755,7 +761,10 @@ def calc_stats(image, mask=None, index=None):
 
             This object will have members `mean`, `cov`, and `nsamples`.
     '''
+    from spectral.algorithms.spymath import has_nan, NaNValueError
     (mean, cov, N) = mean_cov(image, mask, index)
+    if has_nan(mean) and not allow_nan:
+        raise NaNValueError('NaN values present in data.')
     return GaussianStats(mean=mean, cov=cov, nsamples=N)
 
 
diff --git a/spectral/algorithms/clustering.py b/spectral/algorithms/clustering.py
index 2219dcd..33222cf 100644
--- a/spectral/algorithms/clustering.py
+++ b/spectral/algorithms/clustering.py
@@ -217,10 +217,10 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs):
         for i in range(nclusters):
             centers[i] = boxMin.astype(float) + i * delta
 
-    iter = 1
-    while (iter <= max_iterations):
+    itnum = 1
+    while (itnum <= max_iterations):
         try:
-            status.display_percentage('Iteration %d...' % iter)
+            status.display_percentage('Iteration %d...' % itnum)
 
             # Assign all pixels
             for i in range(nrows):
@@ -251,10 +251,12 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs):
                 iterations.append(clusters)
 
             if compare and compare(old_clusters, clusters):
+                status.end_percentage('done.')
                 break
             else:
                 nChanged = numpy.sum(clusters != old_clusters)
                 if nChanged == 0:
+                    status.end_percentage('0 pixels reassigned.')
                     break
                 else:
                     status.end_percentage('%d pixels reassigned.' \
@@ -263,14 +265,14 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs):
             old_clusters = clusters
             old_centers = centers
             clusters = numpy.zeros((nrows, ncols), int)
-            iter += 1
+            itnum += 1
 
         except KeyboardInterrupt:
             print "KeyboardInterrupt: Returning clusters from previous iteration"
             return (old_clusters, old_centers)
 
     print >>status, 'kmeans terminated with', len(set(old_clusters.ravel())), \
-        'clusters after', iter - 1, 'iterations.'
+        'clusters after', itnum - 1, 'iterations.'
     return (old_clusters, centers)
 
 
@@ -339,6 +341,10 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs):
     '''
     import spectral
     import numpy as np
+    from spectral.algorithms.spymath import has_nan, NaNValueError
+
+    if has_nan(image):
+        raise NaNValueError('Image data contains NaN values.')
 
     status = spectral._status
     
@@ -390,10 +396,10 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs):
     clusters = np.zeros((N,), int)
     old_clusters = np.copy(clusters)
     diffs = np.empty_like(image, dtype=np.float64)
-    iter = 1
-    while (iter <= max_iterations):
+    itnum = 1
+    while (itnum <= max_iterations):
         try:
-            status.display_percentage('Iteration %d...' % iter)
+            status.display_percentage('Iteration %d...' % itnum)
 
             # Assign all pixels
             for i in range(nclusters):
@@ -416,10 +422,12 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs):
                 iterations.append(clusters.reshape(nrows, ncols))
 
             if compare and compare(old_clusters, clusters):
+                status.end_percentage('done.')
                 break
             else:
                 nChanged = numpy.sum(clusters != old_clusters)
                 if nChanged == 0:
+                    status.end_percentage('0 pixels reassigned.')
                     break
                 else:
                     status.end_percentage('%d pixels reassigned.' \
@@ -427,14 +435,14 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs):
 
             old_clusters[:] = clusters
             old_centers[:] = centers
-            iter += 1
+            itnum += 1
 
         except KeyboardInterrupt:
             print "KeyboardInterrupt: Returning clusters from previous iteration."
             return (old_clusters.reshape(nrows, ncols), old_centers)
 
     print >>status, 'kmeans terminated with', len(set(old_clusters.ravel())), \
-        'clusters after', iter - 1, 'iterations.'
+        'clusters after', itnum - 1, 'iterations.'
     return (old_clusters.reshape(nrows, ncols), centers)
 
 
diff --git a/spectral/algorithms/spymath.py b/spectral/algorithms/spymath.py
index 8743918..a4839b9 100644
--- a/spectral/algorithms/spymath.py
+++ b/spectral/algorithms/spymath.py
@@ -76,3 +76,15 @@ def matrix_sqrt(X=None, symmetric=False, inverse=False, eigs=None):
         return V.dot(SRV).dot(V.T)
     else:
         return V.dot(SRV).dot(np.linalg.inv(V))
+
+import exceptions
+
+class NaNValueWarning(exceptions.UserWarning):
+    pass
+
+class NaNValueError(exceptions.ValueError):
+    pass
+
+def has_nan(X):
+    '''returns True if ndarray `X` contains a NaN value.'''
+    return bool(np.isnan(np.min(X)))
diff --git a/spectral/io/spyfile.py b/spectral/io/spyfile.py
index fa68b76..3020531 100644
--- a/spectral/io/spyfile.py
+++ b/spectral/io/spyfile.py
@@ -217,6 +217,8 @@ def load(self, **kwargs):
         import spectral
         from spectral.spectral import ImageArray
         from array import array
+        import warnings
+        from spectral.algorithms.spymath import has_nan, NaNValueWarning
 
         for k in kwargs.keys():
             if k not in ('dtype', 'scale'):
@@ -238,7 +240,10 @@ def load(self, **kwargs):
         npArray = npArray.astype(dtype)
         if self.scale_factor != 1 and kwargs.get('scale', True):
             npArray = npArray / float(self.scale_factor)
-        return ImageArray(npArray, self)
+        imarray = ImageArray(npArray, self)
+        if has_nan(imarray):
+            warnings.warn('Image data contains NaN values.', NaNValueWarning)
+        return imarray        
 
     def __getitem__(self, args):
         '''Subscripting operator that provides a numpy-like interface.