Skip to content

Commit

Permalink
Merge pull request #1 from jmyrberg/modified-aakr
Browse files Browse the repository at this point in the history
Added modified AAKR
  • Loading branch information
jmyrberg authored Dec 29, 2020
2 parents 88dc468 + 9528f70 commit 5fa7c93
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 20 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ X_obs_nc = aakr.transform(X_obs)

## References

* [Assessment of Statistical and Classification Models For Monitoring EDF’s Assets](https://link.springer.com/chapter/10.1007/978-0-85729-320-6_52)
* [Assessment of Statistical and Classification Models For Monitoring EDF’s Assets](https://link.springer.com/chapter/10.1007/978-0-85729-320-6_52)

* [A modified Auto Associative Kernel Regression method for robust signal reconstruction in nuclear power plant components](https://www.researchgate.net/publication/292538769_A_modified_Auto_Associative_Kernel_Regression_method_for_robust_signal_reconstruction_in_nuclear_power_plant_components)

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.1dev6
0.0.1a
97 changes: 83 additions & 14 deletions aakr/_aakr.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@ class AAKR(TransformerMixin, BaseEstimator):
Metric for calculating kernel distances, see available metrics from
`sklearn.metrics.pairwise_distances <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html>`_.
bw : float, default=1.0
Kernel bandwith parameter.
Gaussian Radial Basis Function (RBF) bandwith parameter.
modified : bool, default=False
Whether to use the modified version of AAKR (see reference [2]). The
modified version reduces the contribution provided by those signals
which are expected to be subject to the abnormal conditions.
penalty : array-like or list of shape (n_features, 1) or None, default=None
Penalty vector for the modified AAKR - only used when parameter
modified=True. If modified AAKR used and penalty=None, penalty
vector is automatically determined.
n_jobs : int, default=-1
The number of jobs to run in parallel.
Expand All @@ -37,11 +45,37 @@ class AAKR(TransformerMixin, BaseEstimator):
signal reconstruction in nuclear power plant components", European
Safety and Reliability Conference ESREL.
"""
def __init__(self, metric='euclidean', bw=1, n_jobs=-1):
def __init__(self, metric='euclidean', bw=1., modified=False, penalty=None,
n_jobs=-1):
self.metric = metric
self.bw = bw
self.modified = modified
self.penalty = penalty
self.n_jobs = n_jobs
# TODO: Implement modified -version

def _fit_validation(self, X):
X = check_array(X)

if self.modified:
if self.penalty is not None:
penalty = check_array(self.penalty, ensure_2d=False)
if len(penalty) != X.shape[1]:
raise ValueError('Shape of input is different from what '
'is defined in penalty vector ('
f'{X.shape[1]} != {len(penalty)})')
elif not self.modified and self.penalty is not None:
raise ValueError('Parameter `penalty` given, but `modified=False`.'
'Please set `modified=True` to make use of the '
'penalty vector, or set `penalty=None`.')

def _rbf_kernel(self, X_obs_nc, X_obs):
# Kernel regression
D = pairwise_distances(X=X_obs_nc, Y=X_obs,
metric=self.metric, n_jobs=self.n_jobs)
k = 1 / np.sqrt(2 * np.pi * self.bw ** 2)
w = k * np.exp(-D ** 2 / (2 * self.bw ** 2))

return w

def fit(self, X, y=None):
"""Fit normal condition examples.
Expand All @@ -59,9 +93,10 @@ def fit(self, X, y=None):
Returns self.
"""
# Validation
X = check_array(X)
self._fit_validation(X)

# Save history
# Fit = save history
# TODO: Add pruning options as a parameter... sampling?
self.X_ = X

return self
Expand All @@ -82,7 +117,7 @@ def partial_fit(self, X, y=None):
Returns self.
"""
# Validation
X = check_array(X)
self._fit_validation(X)

# Fit
if hasattr(self, 'X_'):
Expand All @@ -95,7 +130,7 @@ def partial_fit(self, X, y=None):

return self

def transform(self, X, **kwargs):
def transform(self, X):
"""Transform given array into expected values in normal conditions.
Parameters
Expand All @@ -117,12 +152,46 @@ def transform(self, X, **kwargs):
raise ValueError('Shape of input is different from what was seen'
'in `fit`')

# Kernel regression
D = pairwise_distances(X=self.X_, Y=X, metric=self.metric,
n_jobs=self.n_jobs, **kwargs)
k = 1 / np.sqrt(2 * np.pi * self.bw ** 2)
w = k * np.exp(-D ** 2 / (2 * self.bw ** 2))
w_sum = w.sum(0)
X_nc = w.T.dot(self.X_) / np.where(w_sum == 0, 1, w_sum)[:, None]
# Modified AAKR basically sorts the columns
# TODO: Needs to be verified that everything here is correct
if self.modified:
X_obs_nc = self.X_
X_nc = np.zeros(X.shape)

# Penalty matrix (J x J, where J is the number of features)
if self.penalty is None:
D = np.diag(np.arange(X.shape[1]) + 1) ** 2.
D /= D.sum()
else:
D = np.diag(self.penalty).astype('float')

for i, X_obs in enumerate(X): # TODO: Vectorize
# Standardized contributions in decreasing order (J, 1)
diff = (np.abs(X_obs - X_obs_nc) / X_obs_nc.std(0)).sum(0)
order = diff.argsort()[::-1]

# Historical examples with ordered signals and penalty applied
# (N_obs_nc x J)
row_selector = np.arange(len(X_obs_nc))[:, np.newaxis]
X_obs_nc_new = X_obs_nc[row_selector, order].dot(D)

# New observations with ordered features and penalty applied
# (1 x J)
X_obs_new = X_obs[order].dot(D)[np.newaxis, :]

# Weights for each observation (N_obs_nc, 1)
w = self._rbf_kernel(X_obs_nc_new, X_obs_new)

# Apply kernel and save the results (1, J)
w_sum = w.sum(0)
w_div = np.where(w_sum == 0, 1, w_sum)[:, np.newaxis]

X_nc[i, :] = w.T.dot(X_obs_nc) / w_div
else:
w = self._rbf_kernel(self.X_, X)
w_sum = w.sum(0)
w_div = np.where(w_sum == 0, 1, w_sum)[:, np.newaxis]

X_nc = w.T.dot(self.X_) / w_div

return X_nc
7 changes: 4 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
author_email='jesse.myrberg@gmail.com',
url='https://github.com/jmyrberg/aakr',
keywords=['aakr', 'auto', 'associative', 'kernel', 'regression', 'anomaly',
'detection'],
'detection', 'signal', 'reconstruction'],
install_requires=[
'numpy>=1.19.4',
'pandas>=1.1.5',
Expand All @@ -27,7 +27,7 @@
packages=setuptools.find_packages(),
include_package_data=True,
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Development Status :: 3 - Alpha',
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Intended Audience :: Science/Research',
Expand All @@ -43,7 +43,8 @@
extras_require={
'tests': [
'pytest',
'pytest-cov'],
'pytest-cov'
],
'docs': [
'sphinx',
'sphinx_rtd_theme',
Expand Down
32 changes: 31 additions & 1 deletion tests/test_aakr.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
import pytest

from sklearn.datasets import load_linnerud
from sklearn.utils.testing import assert_allclose

try: # scikit-learn < 0.24.0
from sklearn.utils.testing import assert_allclose
except ModuleNotFoundError: # scikit-learn >= 0.24.0
from sklearn.utils._testing import assert_allclose

from aakr import AAKR

Expand All @@ -19,6 +23,8 @@ def test_aakr(data):
aakr = AAKR()
assert aakr.metric == 'euclidean'
assert aakr.bw == 1
assert not aakr.modified
assert aakr.penalty is None
assert aakr.n_jobs == -1

aakr.fit(X)
Expand All @@ -44,3 +50,27 @@ def test_aakr_partial_fit_input_shape_mismatch(data):

with pytest.raises(ValueError, match='Shape of input is different'):
aakr.partial_fit(X[:, :-1])


def test_aakr_modified(data):
X = data[0]

# Modified, no penalty given
aakr = AAKR(modified=True, penalty=None)
X_nc = aakr.fit(X).transform(X[:3])
assert hasattr(aakr, 'X_')
assert_allclose(X_nc, X[:3], atol=1.)

# Modified, penalty given
aakr = AAKR(modified=True, penalty=[1] * X.shape[1])
X_nc = aakr.fit(X).transform(X[:3])
assert hasattr(aakr, 'X_')
assert_allclose(X_nc, X[:3], atol=1.)

# Modified, penalty given, mismatch with input data
with pytest.raises(ValueError, match='Shape of input is different from'):
AAKR(modified=True, penalty=[1] * (X.shape[1] - 1)).fit(X)

# No modified, penalty given
with pytest.raises(ValueError, match='Parameter `penalty` given, but'):
AAKR(modified=False, penalty=[1] * X.shape[1]).fit(X)

0 comments on commit 5fa7c93

Please sign in to comment.