Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Smoothing in several dimensions #436

Merged
merged 13 commits into from
Oct 10, 2023
Merged
104 changes: 60 additions & 44 deletions skfda/misc/hat_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,21 @@

import abc
import math
from typing import Callable, TypeVar, Union, overload
from typing import Callable, Final, TypeVar, Union, overload

import numpy as np

from .._utils._sklearn_adapter import BaseEstimator
from ..representation._functional_data import FData
from ..representation.basis import FDataBasis
from ..typing._base import GridPointsLike
from ..typing._numpy import NDArrayFloat
from . import kernels

Input = TypeVar("Input", bound=Union[FData, GridPointsLike])
Input = TypeVar("Input", bound=Union[FData, NDArrayFloat])
Prediction = TypeVar("Prediction", bound=Union[NDArrayFloat, FData])

DEFAULT_BANDWIDTH_PERCENTILE: Final = 15


class HatMatrix(
BaseEstimator,
Expand Down Expand Up @@ -52,8 +53,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -65,8 +66,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: Prediction | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -77,8 +78,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: Input | None = None,
X: Input | None = None,
X_train: Input,
X: Input,
y_train: NDArrayFloat | FData | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand Down Expand Up @@ -186,7 +187,7 @@ def _hat_matrix_function_not_normalized(
) -> NDArrayFloat:

bandwidth = (
np.percentile(np.abs(delta_x), 15)
float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
if self.bandwidth is None
else self.bandwidth
)
Expand Down Expand Up @@ -278,8 +279,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -291,8 +292,8 @@ def __call__(
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: Prediction | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
Expand All @@ -303,19 +304,37 @@ def __call__( # noqa: D102
self,
*,
delta_x: NDArrayFloat,
X_train: FData | GridPointsLike | None = None,
X: FData | GridPointsLike | None = None,
X_train: FData | NDArrayFloat,
X: FData | NDArrayFloat,
y_train: NDArrayFloat | FData | None = None,
weights: NDArrayFloat | None = None,
_cv: bool = False,
) -> NDArrayFloat | FData:

bandwidth = (
np.percentile(np.abs(delta_x), 15)
float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE))
if self.bandwidth is None
else self.bandwidth
)

# Smoothing for functions of one variable
if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1:
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
delta_x = np.subtract.outer(
X.flatten(),
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
X_train.flatten(),
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
)

assert y_train is None

return super().__call__( # noqa: WPS503
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
delta_x=delta_x,
X_train=X_train,
X=X,
y_train=y_train,
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
weights=weights,
_cv=_cv,
)

# Regression
if isinstance(X_train, FData):
assert isinstance(X, FData)
Expand All @@ -326,44 +345,41 @@ def __call__( # noqa: D102
):
raise ValueError("Only FDataBasis is supported for now.")

m1 = X_train.coefficients
m2 = X.coefficients
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

if y_train is None:
y_train = np.identity(X_train.n_samples)

m1 = X_train.coefficients
m2 = X.coefficients
else:
m1 = X_train
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
m2 = X
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

if y_train is None:
y_train = np.identity(X_train.shape[0])
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

# Subtract previous matrices obtaining a 3D matrix
# The i-th element contains the matrix X_train - X[i]
C = m1 - m2[:, np.newaxis]
# Subtract previous matrices obtaining a 3D matrix
# The i-th element contains the matrix X_train - X[i]
C = m1 - m2[:, np.newaxis]

# Inner product matrix only is applicable in regression
if isinstance(X_train, FDataBasis):
inner_product_matrix = X_train.basis.inner_product_matrix()

# Calculate new coefficients taking into account cross-products
# if the basis is orthonormal, C would not change
C = C @ inner_product_matrix # noqa: WPS350

# Adding a column of ones in the first position of all matrices
dims = (C.shape[0], C.shape[1], 1)
C = np.concatenate((np.ones(dims), C), axis=-1)

return self._solve_least_squares(
delta_x=delta_x,
coefs=C,
y_train=y_train,
bandwidth=bandwidth,
)

# Smoothing
else:
# Adding a column of ones in the first position of all matrices
dims = (C.shape[0], C.shape[1], 1)
C = np.concatenate((np.ones(dims), C), axis=-1)

return super().__call__( # type: ignore[misc, type-var] # noqa: WPS503
delta_x=delta_x,
X_train=X_train,
X=X,
y_train=y_train, # type: ignore[arg-type]
weights=weights,
_cv=_cv,
)
return self._solve_least_squares(
delta_x=delta_x,
coefs=C,
y_train=y_train,
bandwidth=bandwidth,
vnmabus marked this conversation as resolved.
Show resolved Hide resolved
)

def _solve_least_squares(
self,
Expand Down Expand Up @@ -485,7 +501,7 @@ def _hat_matrix_function_not_normalized(
# For each row in the distances matrix, it calculates the furthest
# point within the k nearest neighbours
vec = np.sort(
np.abs(delta_x),
delta_x,
axis=1,
)[:, n_neighbors - 1] + tol

Expand Down
16 changes: 10 additions & 6 deletions skfda/preprocessing/smoothing/_kernel_smoothers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@

import numpy as np

from ..._utils._utils import _to_grid_points
from ..._utils._utils import _cartesian_product, _to_grid_points
from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix
from ...misc.metrics import PairwiseMetric, l2_distance
from ...typing._base import GridPointsLike
from ...typing._metric import Metric
from ...typing._numpy import NDArrayFloat
from ._linear import _LinearSmoother

Expand Down Expand Up @@ -114,10 +116,12 @@ def __init__(
*,
weights: Optional[NDArrayFloat] = None,
output_points: Optional[GridPointsLike] = None,
metric: Metric[NDArrayFloat] = l2_distance,
):
self.kernel_estimator = kernel_estimator
self.weights = weights
self.output_points = output_points
self.metric = metric
self._cv = False # For testing purposes only

def _hat_matrix(
Expand All @@ -126,18 +130,18 @@ def _hat_matrix(
output_points: GridPointsLike,
) -> NDArrayFloat:

input_points = _to_grid_points(input_points)
output_points = _to_grid_points(output_points)
input_points = _cartesian_product(_to_grid_points(input_points))
output_points = _cartesian_product(_to_grid_points(output_points))

if self.kernel_estimator is None:
self.kernel_estimator = NadarayaWatsonHatMatrix()

delta_x = np.subtract.outer(output_points[0], input_points[0])
delta_x = PairwiseMetric(self.metric)(output_points, input_points)
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

return self.kernel_estimator(
delta_x=delta_x,
weights=self.weights,
X_train=input_points,
X=output_points,
X_train=np.array(input_points),
X=np.array(output_points),
_cv=self._cv,
)
19 changes: 18 additions & 1 deletion skfda/preprocessing/smoothing/_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class _LinearSmoother(
``hat_matrix`` to define the smoothing or 'hat' matrix.

"""

input_points_: GridPoints
output_points_: GridPoints

Expand Down Expand Up @@ -116,9 +117,25 @@ def transform(
)
)

dims = [len(e) for e in self.output_points_]
dims += [len(e) for e in self.input_points_]

hat_matrix = np.reshape(
self.hat_matrix_,
dims,
)

data_matrix = np.einsum(
hat_matrix,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚫 [mypy] reported by reviewdog 🐶
Argument 1 to "einsum" has incompatible type "ndarray[Any, dtype[floating[Any]]]"; expected "str | SupportsArray[dtype[bool | integer[Any]]] | _NestedSequence[SupportsArray[dtype[bool | integer[Any]]]] | bool | int | _NestedSequence[bool | int]" [arg-type]

[Ellipsis, *range(1, len(self.output_points_) + 1)],
X.data_matrix,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚫 [mypy] reported by reviewdog 🐶
Argument 3 to "einsum" has incompatible type "ndarray[Any, dtype[floating[Any]]]"; expected "SupportsArray[dtype[bool]] | _NestedSequence[SupportsArray[dtype[bool]]] | bool | _NestedSequence[bool]" [arg-type]

[0, *range(1, len(self.output_points_) + 2)],
[0, Ellipsis, len(self.output_points_) + 1],

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🚫 [mypy] reported by reviewdog 🐶
Argument 5 to "einsum" has incompatible type "list[object]"; expected "SupportsArray[dtype[bool]] | _NestedSequence[SupportsArray[dtype[bool]]] | bool | _NestedSequence[bool]" [arg-type]

)

# The matrix is cached
return X.copy(
data_matrix=self.hat_matrix_ @ X.data_matrix,
data_matrix=data_matrix,
grid_points=self.output_points_,
)

Expand Down
34 changes: 34 additions & 0 deletions skfda/tests/test_smoothing.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import sklearn
from sklearn.datasets import load_digits
from typing_extensions import Literal

import skfda
Expand Down Expand Up @@ -173,6 +174,39 @@ def test_knn(self) -> None:
np.testing.assert_allclose(hat_matrix, hat_matrix_r)


class TestSurfaceSmoother(unittest.TestCase):
"""Test that smoothing works on surfaces."""

def test_knn(self) -> None:
"""Check 2D smoothing using digits dataset."""
X, y = load_digits(return_X_y=True)
X = X.reshape(-1, 8, 8)

fd = skfda.FDataGrid(X)

ks = KernelSmoother(
kernel_estimator=KNeighborsHatMatrix(n_neighbors=1))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pep8] reported by reviewdog 🐶
WPS319 Found bracket in wrong position

fd_trans = ks.fit_transform(fd)

np.testing.assert_allclose(fd_trans.data_matrix, fd.data_matrix)

ks = KernelSmoother(
kernel_estimator=KNeighborsHatMatrix(n_neighbors=3))

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pep8] reported by reviewdog 🐶
WPS319 Found bracket in wrong position

fd_trans = ks.fit_transform(fd)

res = [
[0, 1.25, 7.75, 10.5, 8.25, 6.25, 1.5, 0],
[0, 3.2, 9.6, 10.6, 9.8, 8.4, 5.6, 1.25],
[0.75, 4.4, 9, 6.4, 4.6, 8.4, 6.4, 2],
[1, 4.8, 7.8, 2.8, 1.6, 7.2, 6.4, 2],
[1.25, 4.2, 7.2, 1.6, 2, 7.4, 6.4, 2],
[1, 4.4, 7.4, 3.4, 4.6, 8.2, 5.4, 1.75],
[0.5, 4, 7.6, 8.4, 7.6, 6.8, 3.8, 0],
[0, 2, 8.25, 8.5, 8.25, 5.5, 0, 0],
]
np.testing.assert_allclose(fd_trans.data_matrix[0, ..., 0], res)


class TestBasisSmoother(unittest.TestCase):
"""Test Basis Smoother."""

Expand Down