diff --git a/README.md b/README.md index 050d427..fc97549 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ X_obs_nc = aakr.transform(X_obs) ## References -* [Assessment of Statistical and Classification Models For Monitoring EDF’s Assets”](https://link.springer.com/chapter/10.1007/978-0-85729-320-6_52) +* [Assessment of Statistical and Classification Models For Monitoring EDF’s Assets](https://link.springer.com/chapter/10.1007/978-0-85729-320-6_52) * [A modified Auto Associative Kernel Regression method for robust signal reconstruction in nuclear power plant components](https://www.researchgate.net/publication/292538769_A_modified_Auto_Associative_Kernel_Regression_method_for_robust_signal_reconstruction_in_nuclear_power_plant_components) diff --git a/VERSION b/VERSION index 217a8ec..bf6f03e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.0.1dev6 \ No newline at end of file +0.0.1a \ No newline at end of file diff --git a/aakr/_aakr.py b/aakr/_aakr.py index ce6c0f3..14a25c1 100644 --- a/aakr/_aakr.py +++ b/aakr/_aakr.py @@ -17,7 +17,15 @@ class AAKR(TransformerMixin, BaseEstimator): Metric for calculating kernel distances, see available metrics from `sklearn.metrics.pairwise_distances `_. bw : float, default=1.0 - Kernel bandwith parameter. + Gaussian Radial Basis Function (RBF) bandwith parameter. + modified : bool, default=False + Whether to use the modified version of AAKR (see reference [2]). The + modified version reduces the contribution provided by those signals + which are expected to be subject to the abnormal conditions. + penalty : array-like or list of shape (n_features, 1) or None, default=None + Penalty vector for the modified AAKR - only used when parameter + modified=True. If modified AAKR used and penalty=None, penalty + vector is automatically determined. n_jobs : int, default=-1 The number of jobs to run in parallel. @@ -37,11 +45,37 @@ class AAKR(TransformerMixin, BaseEstimator): signal reconstruction in nuclear power plant components", European Safety and Reliability Conference ESREL. """ - def __init__(self, metric='euclidean', bw=1, n_jobs=-1): + def __init__(self, metric='euclidean', bw=1., modified=False, penalty=None, + n_jobs=-1): self.metric = metric self.bw = bw + self.modified = modified + self.penalty = penalty self.n_jobs = n_jobs - # TODO: Implement modified -version + + def _fit_validation(self, X): + X = check_array(X) + + if self.modified: + if self.penalty is not None: + penalty = check_array(self.penalty, ensure_2d=False) + if len(penalty) != X.shape[1]: + raise ValueError('Shape of input is different from what ' + 'is defined in penalty vector (' + f'{X.shape[1]} != {len(penalty)})') + elif not self.modified and self.penalty is not None: + raise ValueError('Parameter `penalty` given, but `modified=False`.' + 'Please set `modified=True` to make use of the ' + 'penalty vector, or set `penalty=None`.') + + def _rbf_kernel(self, X_obs_nc, X_obs): + # Kernel regression + D = pairwise_distances(X=X_obs_nc, Y=X_obs, + metric=self.metric, n_jobs=self.n_jobs) + k = 1 / np.sqrt(2 * np.pi * self.bw ** 2) + w = k * np.exp(-D ** 2 / (2 * self.bw ** 2)) + + return w def fit(self, X, y=None): """Fit normal condition examples. @@ -59,9 +93,10 @@ def fit(self, X, y=None): Returns self. """ # Validation - X = check_array(X) + self._fit_validation(X) - # Save history + # Fit = save history + # TODO: Add pruning options as a parameter... sampling? self.X_ = X return self @@ -82,7 +117,7 @@ def partial_fit(self, X, y=None): Returns self. """ # Validation - X = check_array(X) + self._fit_validation(X) # Fit if hasattr(self, 'X_'): @@ -95,7 +130,7 @@ def partial_fit(self, X, y=None): return self - def transform(self, X, **kwargs): + def transform(self, X): """Transform given array into expected values in normal conditions. Parameters @@ -117,12 +152,46 @@ def transform(self, X, **kwargs): raise ValueError('Shape of input is different from what was seen' 'in `fit`') - # Kernel regression - D = pairwise_distances(X=self.X_, Y=X, metric=self.metric, - n_jobs=self.n_jobs, **kwargs) - k = 1 / np.sqrt(2 * np.pi * self.bw ** 2) - w = k * np.exp(-D ** 2 / (2 * self.bw ** 2)) - w_sum = w.sum(0) - X_nc = w.T.dot(self.X_) / np.where(w_sum == 0, 1, w_sum)[:, None] + # Modified AAKR basically sorts the columns + # TODO: Needs to be verified that everything here is correct + if self.modified: + X_obs_nc = self.X_ + X_nc = np.zeros(X.shape) + + # Penalty matrix (J x J, where J is the number of features) + if self.penalty is None: + D = np.diag(np.arange(X.shape[1]) + 1) ** 2. + D /= D.sum() + else: + D = np.diag(self.penalty).astype('float') + + for i, X_obs in enumerate(X): # TODO: Vectorize + # Standardized contributions in decreasing order (J, 1) + diff = (np.abs(X_obs - X_obs_nc) / X_obs_nc.std(0)).sum(0) + order = diff.argsort()[::-1] + + # Historical examples with ordered signals and penalty applied + # (N_obs_nc x J) + row_selector = np.arange(len(X_obs_nc))[:, np.newaxis] + X_obs_nc_new = X_obs_nc[row_selector, order].dot(D) + + # New observations with ordered features and penalty applied + # (1 x J) + X_obs_new = X_obs[order].dot(D)[np.newaxis, :] + + # Weights for each observation (N_obs_nc, 1) + w = self._rbf_kernel(X_obs_nc_new, X_obs_new) + + # Apply kernel and save the results (1, J) + w_sum = w.sum(0) + w_div = np.where(w_sum == 0, 1, w_sum)[:, np.newaxis] + + X_nc[i, :] = w.T.dot(X_obs_nc) / w_div + else: + w = self._rbf_kernel(self.X_, X) + w_sum = w.sum(0) + w_div = np.where(w_sum == 0, 1, w_sum)[:, np.newaxis] + + X_nc = w.T.dot(self.X_) / w_div return X_nc diff --git a/setup.py b/setup.py index e5934b5..903b6bc 100644 --- a/setup.py +++ b/setup.py @@ -18,7 +18,7 @@ author_email='jesse.myrberg@gmail.com', url='https://github.com/jmyrberg/aakr', keywords=['aakr', 'auto', 'associative', 'kernel', 'regression', 'anomaly', - 'detection'], + 'detection', 'signal', 'reconstruction'], install_requires=[ 'numpy>=1.19.4', 'pandas>=1.1.5', @@ -27,7 +27,7 @@ packages=setuptools.find_packages(), include_package_data=True, classifiers=[ - 'Development Status :: 2 - Pre-Alpha', + 'Development Status :: 3 - Alpha', 'Programming Language :: Python :: 3', 'License :: OSI Approved :: MIT License', 'Intended Audience :: Science/Research', @@ -43,7 +43,8 @@ extras_require={ 'tests': [ 'pytest', - 'pytest-cov'], + 'pytest-cov' + ], 'docs': [ 'sphinx', 'sphinx_rtd_theme', diff --git a/tests/test_aakr.py b/tests/test_aakr.py index 53b5194..2fb7e3e 100644 --- a/tests/test_aakr.py +++ b/tests/test_aakr.py @@ -4,7 +4,11 @@ import pytest from sklearn.datasets import load_linnerud -from sklearn.utils.testing import assert_allclose + +try: # scikit-learn < 0.24.0 + from sklearn.utils.testing import assert_allclose +except ModuleNotFoundError: # scikit-learn >= 0.24.0 + from sklearn.utils._testing import assert_allclose from aakr import AAKR @@ -19,6 +23,8 @@ def test_aakr(data): aakr = AAKR() assert aakr.metric == 'euclidean' assert aakr.bw == 1 + assert not aakr.modified + assert aakr.penalty is None assert aakr.n_jobs == -1 aakr.fit(X) @@ -44,3 +50,27 @@ def test_aakr_partial_fit_input_shape_mismatch(data): with pytest.raises(ValueError, match='Shape of input is different'): aakr.partial_fit(X[:, :-1]) + + +def test_aakr_modified(data): + X = data[0] + + # Modified, no penalty given + aakr = AAKR(modified=True, penalty=None) + X_nc = aakr.fit(X).transform(X[:3]) + assert hasattr(aakr, 'X_') + assert_allclose(X_nc, X[:3], atol=1.) + + # Modified, penalty given + aakr = AAKR(modified=True, penalty=[1] * X.shape[1]) + X_nc = aakr.fit(X).transform(X[:3]) + assert hasattr(aakr, 'X_') + assert_allclose(X_nc, X[:3], atol=1.) + + # Modified, penalty given, mismatch with input data + with pytest.raises(ValueError, match='Shape of input is different from'): + AAKR(modified=True, penalty=[1] * (X.shape[1] - 1)).fit(X) + + # No modified, penalty given + with pytest.raises(ValueError, match='Parameter `penalty` given, but'): + AAKR(modified=False, penalty=[1] * X.shape[1]).fit(X)