From 4cf8d55b1549be756857d82480afec7072028a73 Mon Sep 17 00:00:00 2001 From: SELEE Date: Sun, 24 Apr 2022 07:27:06 +0900 Subject: [PATCH] ENH: Add numeric_only to resampler methods (#46792) --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/resample.py | 16 ++++- pandas/tests/resample/test_resample_api.py | 84 ++++++++++++++++++++++ 3 files changed, 99 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 922ef28b855b9..e4879a6c41515 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -96,6 +96,7 @@ Other enhancements - :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`) - Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, and :meth:`DataFrame.cov` (:issue:`46560`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) +- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 65adc68e39548..362f61d25ac34 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1027,9 +1027,21 @@ def quantile(self, q=0.5, **kwargs): # downsample methods for method in ["sum", "prod", "min", "max", "first", "last"]: - def f(self, _method=method, min_count=0, *args, **kwargs): + def f( + self, + _method: str = method, + numeric_only: bool | lib.NoDefault = lib.no_default, + min_count: int = 0, + *args, + **kwargs, + ): + if numeric_only is lib.no_default: + if _method != "sum": + # For DataFrameGroupBy, set it to be False for methods other than `sum`. + numeric_only = False + nv.validate_resampler_func(_method, args, kwargs) - return self._downsample(_method, min_count=min_count) + return self._downsample(_method, numeric_only=numeric_only, min_count=min_count) f.__doc__ = getattr(GroupBy, method).__doc__ setattr(Resampler, method, f) diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 9148600d31bc2..a5834dd237c01 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._libs import lib + import pandas as pd from pandas import ( DataFrame, @@ -771,3 +773,85 @@ def test_end_and_end_day_origin( ) tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize( + "method, numeric_only, expected_data", + [ + ("sum", True, {"num": [25]}), + ("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}), + ("sum", lib.no_default, {"num": [25]}), + ("prod", True, {"num": [100]}), + ("prod", False, {"num": [100]}), + ("prod", lib.no_default, {"num": [100]}), + ("min", True, {"num": [5]}), + ("min", False, {"cat": ["cat_1"], "num": [5]}), + ("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}), + ("max", True, {"num": [20]}), + ("max", False, {"cat": ["cat_2"], "num": [20]}), + ("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}), + ("first", True, {"num": [5]}), + ("first", False, {"cat": ["cat_1"], "num": [5]}), + ("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}), + ("last", True, {"num": [20]}), + ("last", False, {"cat": ["cat_2"], "num": [20]}), + ("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}), + ], +) +def test_frame_downsample_method(method, numeric_only, expected_data): + # GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy + + index = date_range("2018-01-01", periods=2, freq="D") + expected_index = date_range("2018-12-31", periods=1, freq="Y") + df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index) + resampled = df.resample("Y") + + func = getattr(resampled, method) + result = func(numeric_only=numeric_only) + + expected = DataFrame(expected_data, index=expected_index) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "method, numeric_only, expected_data", + [ + ("sum", True, ()), + ("sum", False, ["cat_1cat_2"]), + ("sum", lib.no_default, ["cat_1cat_2"]), + ("prod", True, ()), + ("prod", False, ()), + ("prod", lib.no_default, ()), + ("min", True, ()), + ("min", False, ["cat_1"]), + ("min", lib.no_default, ["cat_1"]), + ("max", True, ()), + ("max", False, ["cat_2"]), + ("max", lib.no_default, ["cat_2"]), + ("first", True, ()), + ("first", False, ["cat_1"]), + ("first", lib.no_default, ["cat_1"]), + ("last", True, ()), + ("last", False, ["cat_2"]), + ("last", lib.no_default, ["cat_2"]), + ], +) +def test_series_downsample_method(method, numeric_only, expected_data): + # GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy + + index = date_range("2018-01-01", periods=2, freq="D") + expected_index = date_range("2018-12-31", periods=1, freq="Y") + df = Series(["cat_1", "cat_2"], index=index) + resampled = df.resample("Y") + + func = getattr(resampled, method) + if numeric_only and numeric_only is not lib.no_default: + with pytest.raises(NotImplementedError, match="not implement numeric_only"): + func(numeric_only=numeric_only) + elif method == "prod": + with pytest.raises(TypeError, match="can't multiply sequence by non-int"): + func(numeric_only=numeric_only) + else: + result = func(numeric_only=numeric_only) + expected = Series(expected_data, index=expected_index) + tm.assert_series_equal(result, expected)