From 3800f32ae45c4ccfe8aa3221d5996b9e61363881 Mon Sep 17 00:00:00 2001 From: Thomas Boggs Date: Tue, 18 Mar 2014 00:21:23 -0400 Subject: [PATCH] Handle np.nan in image data [closes #2]. --- CHANGELOG.txt | 20 ++++++++++++++++++++ spectral/algorithms/algorithms.py | 11 ++++++++++- spectral/algorithms/clustering.py | 28 ++++++++++++++++++---------- spectral/algorithms/spymath.py | 12 ++++++++++++ spectral/io/spyfile.py | 7 ++++++- 5 files changed, 66 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e4a6303..9cfda82 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -2,6 +2,26 @@ # Spectral Python (SPy) package change log ############################################################################## +2014-03-18 tboggs + + [closes #2] Handle image data containing NaN. Rather than just passing + image data through to SPy functions, several checks are now made to + either warn or raise an exception when `np.nan` is found in the data + + * algorithms/algorithms.py (calc_stats): New argument `allow_nan` (False + by default) indicates whether nan should be allowed in the data. If + False, a NanValueError is raised when nan is present. + * algorithms/clustering.py: + (kmeans): Make sure status percentage ends in all code paths + (kmeans_ndarray): Make sure status percentage ends in all code paths. + Raise NanValueError if nan is present. + * algorithms/spymath.py: + (NaNValueWarning): New class + (NaNValueError): New class + (has_nan): New function to determine if np.nan is present in the data. + * io/spyfile.py (SpyFile.load): Issue NaNValueWarning if np.nan is present + in the image data. + 2014-03-11 tboggs [fixed #1] - Pixel row/col is not displayed in imshow window when pixel diff --git a/spectral/algorithms/algorithms.py b/spectral/algorithms/algorithms.py index 5477b6f..d122633 100644 --- a/spectral/algorithms/algorithms.py +++ b/spectral/algorithms/algorithms.py @@ -723,7 +723,7 @@ def get_whitening_transform(self): return LinearTransform(matrix_sqrt(C_1, True), pre=-self.mean) -def calc_stats(image, mask=None, index=None): +def calc_stats(image, mask=None, index=None, allow_nan=False): '''Computes Gaussian stats for image data.. Arguments: @@ -746,6 +746,12 @@ def calc_stats(image, mask=None, index=None): `image`. If not specified but `mask` is, then all nonzero elements of `mask` will be used. + `allow_nan` (bool, default False): + + If True, statistics will be computed even if `np.nan` values are + present in the data; otherwise, `~spectral.algorithms.spymath.NaNValueError` + is raised. + If neither `mask` nor `index` are specified, all samples in `vectors` will be used. @@ -755,7 +761,10 @@ def calc_stats(image, mask=None, index=None): This object will have members `mean`, `cov`, and `nsamples`. ''' + from spectral.algorithms.spymath import has_nan, NaNValueError (mean, cov, N) = mean_cov(image, mask, index) + if has_nan(mean) and not allow_nan: + raise NaNValueError('NaN values present in data.') return GaussianStats(mean=mean, cov=cov, nsamples=N) diff --git a/spectral/algorithms/clustering.py b/spectral/algorithms/clustering.py index 2219dcd..33222cf 100644 --- a/spectral/algorithms/clustering.py +++ b/spectral/algorithms/clustering.py @@ -217,10 +217,10 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs): for i in range(nclusters): centers[i] = boxMin.astype(float) + i * delta - iter = 1 - while (iter <= max_iterations): + itnum = 1 + while (itnum <= max_iterations): try: - status.display_percentage('Iteration %d...' % iter) + status.display_percentage('Iteration %d...' % itnum) # Assign all pixels for i in range(nrows): @@ -251,10 +251,12 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs): iterations.append(clusters) if compare and compare(old_clusters, clusters): + status.end_percentage('done.') break else: nChanged = numpy.sum(clusters != old_clusters) if nChanged == 0: + status.end_percentage('0 pixels reassigned.') break else: status.end_percentage('%d pixels reassigned.' \ @@ -263,14 +265,14 @@ def kmeans(image, nclusters=10, max_iterations=20, **kwargs): old_clusters = clusters old_centers = centers clusters = numpy.zeros((nrows, ncols), int) - iter += 1 + itnum += 1 except KeyboardInterrupt: print "KeyboardInterrupt: Returning clusters from previous iteration" return (old_clusters, old_centers) print >>status, 'kmeans terminated with', len(set(old_clusters.ravel())), \ - 'clusters after', iter - 1, 'iterations.' + 'clusters after', itnum - 1, 'iterations.' return (old_clusters, centers) @@ -339,6 +341,10 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs): ''' import spectral import numpy as np + from spectral.algorithms.spymath import has_nan, NaNValueError + + if has_nan(image): + raise NaNValueError('Image data contains NaN values.') status = spectral._status @@ -390,10 +396,10 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs): clusters = np.zeros((N,), int) old_clusters = np.copy(clusters) diffs = np.empty_like(image, dtype=np.float64) - iter = 1 - while (iter <= max_iterations): + itnum = 1 + while (itnum <= max_iterations): try: - status.display_percentage('Iteration %d...' % iter) + status.display_percentage('Iteration %d...' % itnum) # Assign all pixels for i in range(nclusters): @@ -416,10 +422,12 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs): iterations.append(clusters.reshape(nrows, ncols)) if compare and compare(old_clusters, clusters): + status.end_percentage('done.') break else: nChanged = numpy.sum(clusters != old_clusters) if nChanged == 0: + status.end_percentage('0 pixels reassigned.') break else: status.end_percentage('%d pixels reassigned.' \ @@ -427,14 +435,14 @@ def kmeans_ndarray(image, nclusters=10, max_iterations=20, **kwargs): old_clusters[:] = clusters old_centers[:] = centers - iter += 1 + itnum += 1 except KeyboardInterrupt: print "KeyboardInterrupt: Returning clusters from previous iteration." return (old_clusters.reshape(nrows, ncols), old_centers) print >>status, 'kmeans terminated with', len(set(old_clusters.ravel())), \ - 'clusters after', iter - 1, 'iterations.' + 'clusters after', itnum - 1, 'iterations.' return (old_clusters.reshape(nrows, ncols), centers) diff --git a/spectral/algorithms/spymath.py b/spectral/algorithms/spymath.py index 8743918..a4839b9 100644 --- a/spectral/algorithms/spymath.py +++ b/spectral/algorithms/spymath.py @@ -76,3 +76,15 @@ def matrix_sqrt(X=None, symmetric=False, inverse=False, eigs=None): return V.dot(SRV).dot(V.T) else: return V.dot(SRV).dot(np.linalg.inv(V)) + +import exceptions + +class NaNValueWarning(exceptions.UserWarning): + pass + +class NaNValueError(exceptions.ValueError): + pass + +def has_nan(X): + '''returns True if ndarray `X` contains a NaN value.''' + return bool(np.isnan(np.min(X))) diff --git a/spectral/io/spyfile.py b/spectral/io/spyfile.py index fa68b76..3020531 100644 --- a/spectral/io/spyfile.py +++ b/spectral/io/spyfile.py @@ -217,6 +217,8 @@ def load(self, **kwargs): import spectral from spectral.spectral import ImageArray from array import array + import warnings + from spectral.algorithms.spymath import has_nan, NaNValueWarning for k in kwargs.keys(): if k not in ('dtype', 'scale'): @@ -238,7 +240,10 @@ def load(self, **kwargs): npArray = npArray.astype(dtype) if self.scale_factor != 1 and kwargs.get('scale', True): npArray = npArray / float(self.scale_factor) - return ImageArray(npArray, self) + imarray = ImageArray(npArray, self) + if has_nan(imarray): + warnings.warn('Image data contains NaN values.', NaNValueWarning) + return imarray def __getitem__(self, args): '''Subscripting operator that provides a numpy-like interface.