DOC: Fix PR01 errors in multiple files (pandas-dev#57438) (pandas-dev…

…#57504) * DOC: Fix PR01 errors in multiple files (pandas-dev#57438) Fixed PR01 errors (pandas-dev#57438) and added parameter descriptions for: - `pandas.Grouper` - `pandas.core.groupby.DataFrameGroupBy.cummax` - `pandas.core.groupby.DataFrameGroupBy.cummin` - `pandas.core.groupby.DataFrameGroupBy.cumprod` - `pandas.core.groupby.DataFrameGroupBy.cumsum` - `pandas.core.groupby.DataFrameGroupBy.filter` - `pandas.core.groupby.DataFrameGroupBy.pct_change` - `pandas.core.groupby.DataFrameGroupBy.rolling` - `pandas.core.groupby.SeriesGroupBy.cummax` - `pandas.core.groupby.SeriesGroupBy.cummin` - `pandas.core.groupby.SeriesGroupBy.cumprod` - `pandas.core.groupby.SeriesGroupBy.cumsum` - `pandas.core.groupby.SeriesGroupBy.cumprod` Fixed functions also removed from code_checks.sh * Updated formatting * Corrected E501 formatting * Remove trailing whitespaces * Fix PR02 error from docstring inheritance for resampler nunique method Resolved a PR02 documentation error arising from the inherited `nunique` method docstring within `groupby.SeriesGroupBy.nunique`. The issue was due to the `resample.Resampler.nunique` method lacking a `dropna` parameter, which is present in the inherited docstring. - Introduced `_nunique_extra_params` to dynamically insert parameter documentation only where applicable (i.e. where `groupby.SeriesGroupBy.nunique`is). * Resolve request * Undo mistake * Remove unnecessary import to fix ruff check
pmhatre1 · May 7, 2024 · b6a9005 · b6a9005
1 parent c2f9a24
commit b6a9005
Show file tree

Hide file tree

Showing 5 changed files with 106 additions and 38 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -1477,7 +1477,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.DatetimeIndex.std\
         pandas.ExcelFile\
         pandas.ExcelFile.parse\
-        pandas.Grouper\
         pandas.HDFStore.append\
         pandas.HDFStore.put\
         pandas.Index.get_indexer_for\
@@ -1538,21 +1537,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         pandas.api.types.is_float\
         pandas.api.types.is_hashable\
         pandas.api.types.is_integer\
-        pandas.core.groupby.DataFrameGroupBy.cummax\
-        pandas.core.groupby.DataFrameGroupBy.cummin\
-        pandas.core.groupby.DataFrameGroupBy.cumprod\
-        pandas.core.groupby.DataFrameGroupBy.cumsum\
-        pandas.core.groupby.DataFrameGroupBy.filter\
-        pandas.core.groupby.DataFrameGroupBy.pct_change\
-        pandas.core.groupby.DataFrameGroupBy.rolling\
-        pandas.core.groupby.SeriesGroupBy.cummax\
-        pandas.core.groupby.SeriesGroupBy.cummin\
-        pandas.core.groupby.SeriesGroupBy.cumprod\
-        pandas.core.groupby.SeriesGroupBy.cumsum\
         pandas.core.groupby.SeriesGroupBy.filter\
-        pandas.core.groupby.SeriesGroupBy.nunique\
-        pandas.core.groupby.SeriesGroupBy.pct_change\
-        pandas.core.groupby.SeriesGroupBy.rolling\
         pandas.core.resample.Resampler.max\
         pandas.core.resample.Resampler.min\
         pandas.core.resample.Resampler.quantile\

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -723,15 +723,22 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
         """
         Return number of unique elements in the group.
 
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include NaN in the counts.
+
         Returns
         -------
         Series
             Number of unique values within each group.
 
-        Examples
+        See Also
         --------
-        For SeriesGroupby:
+        core.resample.Resampler.nunique : Method nunique for Resampler.
 
+        Examples
+        --------
         >>> lst = ["a", "a", "b", "b"]
         >>> ser = pd.Series([1, 2, 3, 3], index=lst)
         >>> ser
@@ -744,25 +751,6 @@ def nunique(self, dropna: bool = True) -> Series | DataFrame:
         a    2
         b    1
         dtype: int64
-
-        For Resampler:
-
-        >>> ser = pd.Series(
-        ...     [1, 2, 3, 3],
-        ...     index=pd.DatetimeIndex(
-        ...         ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"]
-        ...     ),
-        ... )
-        >>> ser
-        2023-01-01    1
-        2023-01-15    2
-        2023-02-01    3
-        2023-02-15    3
-        dtype: int64
-        >>> ser.resample("MS").nunique()
-        2023-01-01    2
-        2023-02-01    1
-        Freq: MS, dtype: int64
         """
         ids, ngroups = self._grouper.group_info
         val = self.obj._values
@@ -1942,6 +1930,10 @@ def filter(self, func, dropna: bool = True, *args, **kwargs) -> DataFrame:
         dropna : bool
             Drop groups that do not pass the filter. True by default; if False,
             groups that evaluate False are filled with NaNs.
+        *args
+            Additional positional arguments to pass to `func`.
+        **kwargs
+            Additional keyword arguments to pass to `func`.
 
         Returns
         -------

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -4672,6 +4672,14 @@ def cumprod(self, *args, **kwargs) -> NDFrameT:
         """
         Cumulative product for each group.
 
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments to be passed to `func`.
+        **kwargs : dict
+            Additional/specific keyword arguments to be passed to the function,
+            such as `numeric_only` and `skipna`.
+
         Returns
         -------
         Series or DataFrame
@@ -4722,6 +4730,14 @@ def cumsum(self, *args, **kwargs) -> NDFrameT:
         """
         Cumulative sum for each group.
 
+        Parameters
+        ----------
+        *args : tuple
+            Positional arguments to be passed to `func`.
+        **kwargs : dict
+            Additional/specific keyword arguments to be passed to the function,
+            such as `numeric_only` and `skipna`.
+
         Returns
         -------
         Series or DataFrame
@@ -4776,6 +4792,14 @@ def cummin(
         """
         Cumulative min for each group.
 
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+        **kwargs : dict, optional
+            Additional keyword arguments to be passed to the function, such as `skipna`,
+            to control whether NA/null values are ignored.
+
         Returns
         -------
         Series or DataFrame
@@ -4838,6 +4862,14 @@ def cummax(
         """
         Cumulative max for each group.
 
+        Parameters
+        ----------
+        numeric_only : bool, default False
+            Include only `float`, `int` or `boolean` data.
+        **kwargs : dict, optional
+            Additional keyword arguments to be passed to the function, such as `skipna`,
+            to control whether NA/null values are ignored.
+
         Returns
         -------
         Series or DataFrame
@@ -5134,6 +5166,32 @@ def pct_change(
         """
         Calculate pct_change of each value to previous entry in group.
 
+        Parameters
+        ----------
+        periods : int, default 1
+            Periods to shift for calculating percentage change. Comparing with
+            a period of 1 means adjacent elements are compared, whereas a period
+            of 2 compares every other element.
+
+        fill_method : FillnaOptions or None, default None
+            Specifies how to handle missing values after the initial shift
+            operation necessary for percentage change calculation. Users are
+            encouraged to handle missing values manually in future versions.
+            Valid options are:
+            - A FillnaOptions value ('ffill', 'bfill') for forward or backward filling.
+            - None to avoid filling.
+            Note: Usage is discouraged due to impending deprecation.
+
+        limit : int or None, default None
+            The maximum number of consecutive NA values to fill, based on the chosen
+            `fill_method`. Address NaN values prior to using `pct_change` as this
+            parameter is nearing deprecation.
+
+        freq : str, pandas offset object, or None, default None
+            The frequency increment for time series data (e.g., 'M' for month-end).
+            If None, the frequency is inferred from the index. Relevant for time
+            series data only.
+
         Returns
         -------
         Series or DataFrame

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
@@ -68,6 +68,10 @@ class Grouper:
 
     Parameters
     ----------
+    *args
+        Currently unused, reserved for future use.
+    **kwargs
+        Dictionary of the keyword arguments to pass to Grouper.
     key : str, defaults to None
         Groupby key, which selects the grouping column of the target.
     level : name/number, defaults to None

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -59,7 +59,6 @@
     NDFrame,
     _shared_docs,
 )
-from pandas.core.groupby.generic import SeriesGroupBy
 from pandas.core.groupby.groupby import (
     BaseGroupBy,
     GroupBy,
@@ -1358,8 +1357,38 @@ def ohlc(self):
         return self._downsample("ohlc")
 
     @final
-    @doc(SeriesGroupBy.nunique)
     def nunique(self):
+        """
+        Return number of unique elements in the group.
+
+        Returns
+        -------
+        Series
+            Number of unique values within each group.
+
+        See Also
+        --------
+        core.groupby.SeriesGroupBy.nunique : Method nunique for SeriesGroupBy.
+
+        Examples
+        --------
+        >>> ser = pd.Series(
+        ...     [1, 2, 3, 3],
+        ...     index=pd.DatetimeIndex(
+        ...         ["2023-01-01", "2023-01-15", "2023-02-01", "2023-02-15"]
+        ...     ),
+        ... )
+        >>> ser
+        2023-01-01    1
+        2023-01-15    2
+        2023-02-01    3
+        2023-02-15    3
+        dtype: int64
+        >>> ser.resample("MS").nunique()
+        2023-01-01    2
+        2023-02-01    1
+        Freq: MS, dtype: int64
+        """
         return self._downsample("nunique")
 
     @final