Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature Construction Moments #438

Merged
merged 13 commits into from
Jun 14, 2022
3 changes: 3 additions & 0 deletions skfda/exploratory/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
local_averages,
number_up_crossings,
occupation_measure,
unconditional_central_moments,
unconditional_expected_value,
unconditional_moments,
)
from ._stats import (
cov,
Expand Down
174 changes: 171 additions & 3 deletions skfda/exploratory/stats/_functional_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

from __future__ import annotations

from typing import Optional, Sequence, Tuple, Union
from typing import Callable, Optional, Sequence, Tuple, Union

import numpy as np

from ..._utils import check_is_univariate
from ...representation import FDataBasis, FDataGrid
from ..._utils import check_is_univariate, nquad_vec
from ...representation import FData, FDataBasis, FDataGrid
from ...representation._typing import NDArrayFloat, NDArrayInt


Expand Down Expand Up @@ -342,3 +342,171 @@ def number_up_crossings(
points_greater & points_smaller_rotated,
axis=2,
).T


def unconditional_central_moments(
data: FDataGrid,
n: int,
) -> NDArrayFloat:
r"""
Calculate the unconditional central moments of a dataset.
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

The unconditional central moments are defined as the unconditional
moments where the mean is subtracted from each sample before the
integration. The n-th unconditional central moment is calculated as
follows, where p is the number of observations:

.. math::
f_1(x(t))=\frac{1}{\left(b-a\right)}\int_a^b
\left(x_1(t) - \mu_1\right)^n dt, \dots,
f_p(x(t))=\frac{1}{\left(b-a\right)}\int_a^b
\left(x_p(t) - \mu_p\right)^n dt

Args:
data: FDataGrid where we want to calculate
a particular unconditional central moment.
n: order of the moment.

Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
specified moment.

Example:

We will calculate the first unconditional central moment of the Canadian
Weather data set. In order to simplify the example, we will use only the
first five samples.
First we proceed to import the data set.
>>> from skfda.datasets import fetch_weather
>>> X = fetch_weather(return_X_y=True)[0]

Then we call the function with the samples that we want to consider and the
specified moment order.
>>> import numpy as np
>>> from skfda.exploratory.stats import unconditional_central_moments
>>> np.around(unconditional_central_moments(X[:5], 1), decimals=2)
array([[ 0.01, 0.01],
[ 0.02, 0.01],
[ 0.02, 0.01],
[ 0.02, 0.01],
[ 0.01, 0.01]])
"""
mean = data.integrate() / (
data.domain_range[0][1] - data.domain_range[0][0]
)

return unconditional_expected_value(
data,
lambda x: np.power(x - mean, n),
)


def unconditional_moments(
data: Union[FDataBasis, FDataGrid],
n: int,
) -> NDArrayFloat:
r"""
Calculate the specified unconditional moment of a dataset.

The n-th unconditional moments of p real-valued continuous functions
are calculated as:
.. math::
f_1(x(t))=\frac{1}{\left( b-a\right)}\int_a^b \left(x_1(t)\right)^ndt,
\dots,
f_p(x(t))=\frac{1}{\left( b-a\right)}\int_a^b \left(x_p(t)\right)^n dt
Args:
data: FDataGrid or FDataBasis where we want to calculate
a particular unconditional moment.
n: order of the moment.

Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
specified moment.

Example:

We will calculate the first unconditional moment of the Canadian Weather
data set. In order to simplify the example, we will use only the first
five samples.
First we proceed to import the data set.
>>> from skfda.datasets import fetch_weather
>>> X = fetch_weather(return_X_y=True)[0]

Then we call the function with the samples that we want to consider and the
specified moment order.
>>> import numpy as np
>>> from skfda.exploratory.stats import unconditional_moments
>>> np.around(unconditional_moments(X[:5], 1), decimals=2)
array([[ 4.7 , 4.03],
[ 6.16, 3.96],
[ 5.52, 4.01],
[ 6.82, 3.44],
[ 5.25, 3.29]])
"""
return unconditional_expected_value(
data,
lambda x: np.power(x, n),
)


def unconditional_expected_value(
data: FData,
function: Callable[[np.ndarray], np.ndarray],
) -> NDArrayFloat:
r"""
Calculate the unconditional expected value of a function.

Next formula shows for a defined transformation :math: `g(x(t))`
and p observations, how the unconditional expected values are calculated:
.. math::
f_1(x(t))=\frac{1}{\left( b-a\right)}\int_a^b g
\left(x_1(t)\right)dt,\dots,

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pep8] reported by reviewdog 🐶
WPS430 Found nested function: integrand

f_p(x(t))=\frac{1}{\left( b-a\right)}\int_a^b g
\left(x_p(t)\right) dt
Args:
data: FDataGrid or FDataBasis where we want to calculate
the expected value.
f: function that specifies how the expected value to is calculated.
It has to be a function of X(t).
Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
expectations.

Example:
We will use this funtion to calculate the logarithmic first moment
of the first 5 samples of the Berkeley Growth dataset.
We will start by importing it.
>>> from skfda.datasets import fetch_growth
>>> X = fetch_growth(return_X_y=True)[0]

We will define a function that calculates the inverse first moment.
>>> import numpy as np
>>> f = lambda x: np.power(np.log(x), 1)

Then we call the function with the dataset and the function.
>>> from skfda.exploratory.stats import unconditional_expected_value
>>> np.around(unconditional_expected_value(X[:5], f), decimals=2)
array([[ 4.96],
[ 4.88],
[ 4.85],
[ 4.9 ],
[ 4.84]])
"""
lebesgue_measure = np.prod(
[
(iterval[1] - iterval[0])
for iterval in data.domain_range
],
)

if isinstance(data, FDataGrid):
return function(data).integrate() / lebesgue_measure

def integrand(*args: NDArrayFloat):

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pep8] reported by reviewdog 🐶
WPS430 Found nested function: integrand

f1 = data(args)[:, 0, :]
return function(f1)

return nquad_vec(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is ok. You seem to compute the integral of the function instead of the function applied to the random variable. Please add a test using the same data in both discretized and basis form to check if it works.

integrand,
data.domain_range,
) / lebesgue_measure
11 changes: 11 additions & 0 deletions skfda/representation/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,17 @@ def _get_op_matrix(
* (self.data_matrix.ndim - 1)
)

return other[other_index]
elif other.shape == (
self.n_samples,
self.dim_codomain,
):
other_index = (
(slice(None),) + (np.newaxis,)
* (self.data_matrix.ndim - 2)
+ (slice(None),)
)

return other[other_index]

elif isinstance(other, FDataGrid):
Expand Down
31 changes: 31 additions & 0 deletions tests/test_functional_transformers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
"""Test to check the per functional transformers."""

import unittest

import numpy as np

import skfda.representation.basis as basis
from skfda.datasets import fetch_growth
from skfda.exploratory.stats import unconditional_expected_value


class TestUncondExpectedVals(unittest.TestCase):
"""Tests for unconditional expected values method."""

def test_transform(self) -> None:
"""Check the data transformation is done correctly."""
X = fetch_growth(return_X_y=True)[0]

def f(x: np.ndarray) -> np.ndarray: # noqa: WPS430
return np.log(x)

data_grid = unconditional_expected_value(X[:5], f)
data_basis = unconditional_expected_value(
X[:5].to_basis(basis.BSpline(n_basis=7)),
f,
)
np.testing.assert_allclose(data_basis, data_grid, rtol=1e-3)


if __name__ == '__main__':
unittest.main()