Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature Construction Moments #438

Merged
merged 13 commits into from
Jun 14, 2022
3 changes: 3 additions & 0 deletions skfda/exploratory/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
local_averages,
number_up_crossings,
occupation_measure,
unconditional_central_moments,
unconditional_expected_value,
unconditional_moments,
)
from ._stats import (
cov,
Expand Down
155 changes: 153 additions & 2 deletions skfda/exploratory/stats/_functional_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@

from __future__ import annotations

from typing import Optional, Sequence, Tuple, Union
from typing import Callable, Optional, Sequence, Tuple, Union

import numpy as np

from ..._utils import check_is_univariate
from ..._utils import check_is_univariate, nquad_vec
from ...representation import FDataBasis, FDataGrid
from ...representation._typing import NDArrayFloat, NDArrayInt

Expand Down Expand Up @@ -342,3 +342,154 @@ def number_up_crossings(
points_greater & points_smaller_rotated,
axis=2,
).T


def unconditional_central_moments(
data: FDataGrid,
n: int,
) -> NDArrayFloat:
r"""
Calculate the unconditional central moments of a dataset.
vnmabus marked this conversation as resolved.
Show resolved Hide resolved

The unconditional central moments are defined as the unconditional
moments where the mean is subtracted from each sample before the
integration. They are calculated as follows:

.. math::
f(X)=\int_a^b \left(X_1(t) - \mu_1\right)^ndt,\dots,
\left(X_p(t) - \mu_p\right)^ndt=\int_a^b \left(X(t) - \mu\right)^ndt

Args:
data: FDataGrid where we want to calculate
a particular unconditional central moment.
n: order of the moment.

Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
specified moment.

Example:

We will calculate the first unconditional central moment of the Canadian
Weather data set. In order to simplify the example, we will use only the
first five samples.
First we proceed to import the data set.
>>> from skfda.datasets import fetch_weather
>>> X = fetch_weather(return_X_y=True)[0]

Then we call the function with the samples that we want to consider and the
specified moment order.
>>> import numpy as np
>>> from skfda.exploratory.stats import unconditional_central_moments
>>> np.around(unconditional_central_moments(X[:5], 1), decimals=2)
array([[ 0.01, 0.01],
[ 0.02, 0.01],
[ 0.02, 0.01],
[ 0.02, 0.01],
[ 0.01, 0.01]])
"""
mean = data.integrate() / (
data.domain_range[0][1] - data.domain_range[0][0]
)

return unconditional_expected_value(
data,
lambda x: np.power(x - mean, n),
)


def unconditional_moments(
data: Union[FDataBasis, FDataGrid],
n: int,
) -> NDArrayFloat:
r"""
Calculate the specified unconditional moment of a dataset.

It performs the following map:
:math:`f(X)=\int_a^b f_1(X(t))dt,\dots,f_p(X(t))dt=\int_a^b f^p(X(t))dt`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is the n in this formula? What is p?


Args:
data: FDataGrid or FDataBasis where we want to calculate
a particular unconditional moment.
n: order of the moment.

Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
specified moment.

Example:

We will calculate the first unconditional moment of the Canadian Weather
data set. In order to simplify the example, we will use only the first
five samples.
First we proceed to import the data set.
>>> from skfda.datasets import fetch_weather
>>> X = fetch_weather(return_X_y=True)[0]

Then we call the function with the samples that we want to consider and the
specified moment order.
>>> import numpy as np
>>> from skfda.exploratory.stats import unconditional_moments
>>> np.around(unconditional_moments(X[:5], 1), decimals=2)
array([[ 4.7 , 4.03],
[ 6.16, 3.96],
[ 5.52, 4.01],
[ 6.82, 3.44],
[ 5.25, 3.29]])
"""
return unconditional_expected_value(
data,
lambda x: np.power(x, n),
)


def unconditional_expected_value(
data: Union[FDataBasis, FDataGrid],
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can use FData instead of a Union.

function: Callable[[np.ndarray], np.ndarray],
) -> NDArrayFloat:
"""
Calculate the unconditional expected value of a function.

Args:
data: FDataGrid or FDataBasis where we want to calculate
the expected value.
f: function that specifies how the expected value to is calculated.
It has to be a function of X(t).
Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
expectations.

Example:
We will use this funtion to calculate the logarithmic first moment
of the first 5 samples of the Berkeley Growth dataset.
We will start by importing it.
>>> from skfda.datasets import fetch_growth
>>> X = fetch_growth(return_X_y=True)[0]

We will define a function that calculates the inverse first moment.
>>> import numpy as np
>>> f = lambda x: np.power(np.log(x), 1)

Then we call the function with the dataset and the function.
>>> from skfda.exploratory.stats import unconditional_expected_value
>>> np.around(unconditional_expected_value(X[:5], f), decimals=2)
array([[ 4.96],
[ 4.88],
[ 4.85],
[ 4.9 ],
[ 4.84]])
"""
domain_range = np.prod(
[
(iterval[1] - iterval[0])
for iterval in data.domain_range
],
)

if isinstance(data, FDataGrid):
return function(data).integrate() / domain_range

return nquad_vec(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is ok. You seem to compute the integral of the function instead of the function applied to the random variable. Please add a test using the same data in both discretized and basis form to check if it works.

function,
data.integrate(),
) / domain_range
11 changes: 11 additions & 0 deletions skfda/representation/grid.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,17 @@ def _get_op_matrix(
* (self.data_matrix.ndim - 1)
)

return other[other_index]
elif other.shape == (
self.n_samples,
self.dim_codomain,
):
other_index = (
(slice(None),) + (np.newaxis,)
* (self.data_matrix.ndim - 2)
+ (slice(None),)
)

return other[other_index]

elif isinstance(other, FDataGrid):
Expand Down