Merge branch 'develop' into feature/classificationExample

GAA-UAM · Jun 13, 2022 · f72c9a4 · f72c9a4
2 parents 8b0049e + 4541b91
commit f72c9a4
Show file tree

Hide file tree

Showing 5 changed files with 114 additions and 83 deletions.
diff --git a/binder/requirements.txt b/binder/requirements.txt
@@ -1,3 +1,4 @@
 -r ../readthedocs-requirements.txt
 jupytext
+sphinx-gallery<=0.7.0
 .
diff --git a/docs/modules/preprocessing.rst b/docs/modules/preprocessing.rst
@@ -13,6 +13,7 @@ this category deal with this problem.
    preprocessing/smoothing
    preprocessing/registration
    preprocessing/dim_reduction
+   preprocessing/feature_construction
 
 Smoothing
 ---------

diff --git a/skfda/datasets/_real_datasets.py b/skfda/datasets/_real_datasets.py
@@ -146,7 +146,32 @@ def _ucr_to_fdatagrid(name: str, data: np.ndarray) -> FDataGrid:
     return FDataGrid(data, grid_points=grid_points, dataset_name=name)
 
 
-def fetch_ucr(name: str, **kwargs: Any) -> Bunch:
+@overload
+def fetch_ucr(
+    name: str,
+    *,
+    return_X_y: Literal[False] = False,
+    **kwargs: Any,
+) -> Bunch:
+    pass
+
+
+@overload
+def fetch_ucr(
+    name: str,
+    *,
+    return_X_y: Literal[True],
+    **kwargs: Any,
+) -> Tuple[FDataGrid, ndarray]:
+    pass
+
+
+def fetch_ucr(
+    name: str,
+    *,
+    return_X_y: bool = False,
+    **kwargs: Any,
+) -> Union[Bunch, Tuple[FDataGrid, ndarray]]:
     """
     Fetch a dataset from the UCR.
 
@@ -180,12 +205,8 @@ def fetch_ucr(name: str, **kwargs: Any) -> Bunch:
     )
     dataset.pop('feature_names')
 
-    data_test = dataset.get('data_test', None)
-    if data_test is not None:
-        dataset['data_test'] = _ucr_to_fdatagrid(
-            name=dataset['name'],
-            data=data_test,
-        )
+    if return_X_y:
+        return dataset['data'], dataset['target']
 
     return dataset
 

diff --git a/skfda/exploratory/stats/_functional_transformers.py b/skfda/exploratory/stats/_functional_transformers.py
@@ -37,17 +37,18 @@ def local_averages(
         ndarray of shape (n_samples, n_intervals, n_dimensions) with
         the transformed data.
 
-    Example:
-
+    Examples:
         We import the Berkeley Growth Study dataset.
         We will use only the first 3 samples to make the
-        example easy.
+        example easy
+
         >>> from skfda.datasets import fetch_growth
         >>> dataset = fetch_growth(return_X_y=True)[0]
         >>> X = dataset[:3]
 
         Then we decide how many intervals we want to consider (in our case 2)
         and call the function with the dataset.
+
         >>> import numpy as np
         >>> from skfda.exploratory.stats import local_averages
         >>> np.around(local_averages(X, 2), decimals=2)
@@ -151,9 +152,10 @@ def occupation_measure(
             ndarray of shape (n_samples, n_intervals)
             with the transformed data.
 
-    Example:
+    Examples:
         We will create the FDataGrid that we will use to extract
-        the occupation measure
+        the occupation measure.
+
         >>> from skfda.representation import FDataGrid
         >>> import numpy as np
         >>> t = np.linspace(0, 10, 100)
@@ -171,6 +173,7 @@ def occupation_measure(
         and (2.0, 3.0). We need also to specify the number of points
         we want that the function takes into account to interpolate.
         We are going to use 501 points.
+
         >>> from skfda.exploratory.stats import occupation_measure
         >>> np.around(
         ...     occupation_measure(
@@ -238,42 +241,43 @@ def number_up_crossings(
             ndarray of shape (n_samples, len(levels))\
             with the values of the counters.
 
-    Example:
-
-    For this example we will use a well known function so the correct
-    functioning of this method can be checked.
-    We will create and use a DataFrame with a sample extracted from
-    the Bessel Function of first type and order 0.
-    First of all we import the Bessel Function and create the X axis
-    data grid. Then we create the FdataGrid.
-    >>> from skfda.exploratory.stats import number_up_crossings
-    >>> from scipy.special import jv
-    >>> import numpy as np
-    >>> x_grid = np.linspace(0, 14, 14)
-    >>> fd_grid = FDataGrid(
-    ...     data_matrix=[jv([0], x_grid)],
-    ...     grid_points=x_grid,
-    ... )
-    >>> fd_grid.data_matrix
-    array([[[ 1.        ],
-            [ 0.73041066],
-            [ 0.13616752],
-            [-0.32803875],
-            [-0.35967936],
-            [-0.04652559],
-            [ 0.25396879],
-            [ 0.26095573],
-            [ 0.01042895],
-            [-0.22089135],
-            [-0.2074856 ],
-            [ 0.0126612 ],
-            [ 0.20089319],
-            [ 0.17107348]]])
-
-    Finally we evaluate the number of up crossings method with the FDataGrid
-    created.
-    >>> number_up_crossings(fd_grid, np.asarray([0]))
-    array([[2]])
+    Examples:
+        For this example we will use a well known function so the correct
+        functioning of this method can be checked.
+        We will create and use a DataFrame with a sample extracted from
+        the Bessel Function of first type and order 0.
+        First of all we import the Bessel Function and create the X axis
+        data grid. Then we create the FdataGrid.
+
+        >>> from skfda.exploratory.stats import number_up_crossings
+        >>> from scipy.special import jv
+        >>> import numpy as np
+        >>> x_grid = np.linspace(0, 14, 14)
+        >>> fd_grid = FDataGrid(
+        ...     data_matrix=[jv([0], x_grid)],
+        ...     grid_points=x_grid,
+        ... )
+        >>> fd_grid.data_matrix
+        array([[[ 1.        ],
+                [ 0.73041066],
+                [ 0.13616752],
+                [-0.32803875],
+                [-0.35967936],
+                [-0.04652559],
+                [ 0.25396879],
+                [ 0.26095573],
+                [ 0.01042895],
+                [-0.22089135],
+                [-0.2074856 ],
+                [ 0.0126612 ],
+                [ 0.20089319],
+                [ 0.17107348]]])
+
+        Finally we evaluate the number of up crossings method with the
+        FDataGrid created.
+
+        >>> number_up_crossings(fd_grid, np.asarray([0]))
+        array([[2]])
     """
     curves = data.data_matrix[:, :, 0]
 

diff --git a/skfda/preprocessing/feature_construction/_fda_feature_union.py b/skfda/preprocessing/feature_construction/_fda_feature_union.py
@@ -47,41 +47,45 @@ class FDAFeatureUnion(FeatureUnion):  # type: ignore
             output. By default the value is False.
 
     Examples:
-    Firstly we will import the Berkeley Growth Study data set
-    >>> from skfda.datasets import fetch_growth
-    >>> X,y = fetch_growth(return_X_y=True)
-
-    Then we need to import the transformers we want to use. In our case we
-    will use the Recursive Maxima Hunting method to select important features.
-    We will concatenate to the results of the previous method the original
-    curves with an Evaluation Transfomer.
-    >>> from skfda.preprocessing.feature_construction import (
-    ...     FDAFeatureUnion,
-    ... )
-    >>> from skfda.preprocessing.dim_reduction.variable_selection import (
-    ...     RecursiveMaximaHunting,
-    ... )
-    >>> from skfda.preprocessing.feature_construction import (
-    ...     EvaluationTransformer,
-    ... )
-    >>> import numpy as np
-
-    Finally we apply fit and transform.
-    >>> union = FDAFeatureUnion(
-    ...     [
-    ...        ("rmh", RecursiveMaximaHunting()),
-    ...        ("eval", EvaluationTransformer()),
-    ...     ],
-    ...     array_output=True,
-    ... )
-    >>> np.around(union.fit_transform(X,y), decimals=2)
-    array([[ 195.1,  141.1,  163.8, ...,  193.8,  194.3,  195.1],
-           [ 178.7,  133. ,  148.1, ...,  176.1,  177.4,  178.7],
-           [ 171.5,  126.5,  143.6, ...,  170.9,  171.2,  171.5],
-            ...,
-           [ 166.8,  132.8,  152.2, ...,  166. ,  166.3,  166.8],
-           [ 168.6,  139.4,  161.6, ...,  168.3,  168.4,  168.6],
-           [ 169.2,  138.1,  161.7, ...,  168.6,  168.9,  169.2]])
+        Firstly we will import the Berkeley Growth Study data set:
+
+        >>> from skfda.datasets import fetch_growth
+        >>> X,y = fetch_growth(return_X_y=True)
+
+        Then we need to import the transformers we want to use. In our case we
+        will use the Recursive Maxima Hunting method to select important
+        features.
+        We will concatenate to the results of the previous method the original
+        curves with an Evaluation Transfomer.
+
+        >>> from skfda.preprocessing.feature_construction import (
+        ...     FDAFeatureUnion,
+        ... )
+        >>> from skfda.preprocessing.dim_reduction.variable_selection import (
+        ...     RecursiveMaximaHunting,
+        ... )
+        >>> from skfda.preprocessing.feature_construction import (
+        ...     EvaluationTransformer,
+        ... )
+        >>> import numpy as np
+
+        Finally we apply fit and transform.
+
+        >>> union = FDAFeatureUnion(
+        ...     [
+        ...        ("rmh", RecursiveMaximaHunting()),
+        ...        ("eval", EvaluationTransformer()),
+        ...     ],
+        ...     array_output=True,
+        ... )
+        >>> np.around(union.fit_transform(X,y), decimals=2)
+        array([[ 195.1,  141.1,  163.8, ...,  193.8,  194.3,  195.1],
+               [ 178.7,  133. ,  148.1, ...,  176.1,  177.4,  178.7],
+               [ 171.5,  126.5,  143.6, ...,  170.9,  171.2,  171.5],
+                ...,
+               [ 166.8,  132.8,  152.2, ...,  166. ,  166.3,  166.8],
+               [ 168.6,  139.4,  161.6, ...,  168.3,  168.4,  168.6],
+               [ 169.2,  138.1,  161.7, ...,  168.6,  168.9,  169.2]])
     """
 
     def __init__(