Skip to content

Commit

Permalink
ENH: Add numeric_only to resampler methods (#46792)
Browse files Browse the repository at this point in the history
  • Loading branch information
lorentzbao authored Apr 23, 2022
1 parent 4caa297 commit 4cf8d55
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 2 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ Other enhancements
- :meth:`pd.concat` now raises when ``levels`` contains duplicate values (:issue:`46653`)
- Added ``numeric_only`` argument to :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith`, and :meth:`DataFrame.cov` (:issue:`46560`)
- A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`)
- Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`)

.. ---------------------------------------------------------------------------
.. _whatsnew_150.notable_bug_fixes:
Expand Down
16 changes: 14 additions & 2 deletions pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1027,9 +1027,21 @@ def quantile(self, q=0.5, **kwargs):
# downsample methods
for method in ["sum", "prod", "min", "max", "first", "last"]:

def f(self, _method=method, min_count=0, *args, **kwargs):
def f(
self,
_method: str = method,
numeric_only: bool | lib.NoDefault = lib.no_default,
min_count: int = 0,
*args,
**kwargs,
):
if numeric_only is lib.no_default:
if _method != "sum":
# For DataFrameGroupBy, set it to be False for methods other than `sum`.
numeric_only = False

nv.validate_resampler_func(_method, args, kwargs)
return self._downsample(_method, min_count=min_count)
return self._downsample(_method, numeric_only=numeric_only, min_count=min_count)

f.__doc__ = getattr(GroupBy, method).__doc__
setattr(Resampler, method, f)
Expand Down
84 changes: 84 additions & 0 deletions pandas/tests/resample/test_resample_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
import pytest

from pandas._libs import lib

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -771,3 +773,85 @@ def test_end_and_end_day_origin(
)

tm.assert_series_equal(res, expected)


@pytest.mark.parametrize(
"method, numeric_only, expected_data",
[
("sum", True, {"num": [25]}),
("sum", False, {"cat": ["cat_1cat_2"], "num": [25]}),
("sum", lib.no_default, {"num": [25]}),
("prod", True, {"num": [100]}),
("prod", False, {"num": [100]}),
("prod", lib.no_default, {"num": [100]}),
("min", True, {"num": [5]}),
("min", False, {"cat": ["cat_1"], "num": [5]}),
("min", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
("max", True, {"num": [20]}),
("max", False, {"cat": ["cat_2"], "num": [20]}),
("max", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
("first", True, {"num": [5]}),
("first", False, {"cat": ["cat_1"], "num": [5]}),
("first", lib.no_default, {"cat": ["cat_1"], "num": [5]}),
("last", True, {"num": [20]}),
("last", False, {"cat": ["cat_2"], "num": [20]}),
("last", lib.no_default, {"cat": ["cat_2"], "num": [20]}),
],
)
def test_frame_downsample_method(method, numeric_only, expected_data):
# GH#46442 test if `numeric_only` behave as expected for DataFrameGroupBy

index = date_range("2018-01-01", periods=2, freq="D")
expected_index = date_range("2018-12-31", periods=1, freq="Y")
df = DataFrame({"cat": ["cat_1", "cat_2"], "num": [5, 20]}, index=index)
resampled = df.resample("Y")

func = getattr(resampled, method)
result = func(numeric_only=numeric_only)

expected = DataFrame(expected_data, index=expected_index)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"method, numeric_only, expected_data",
[
("sum", True, ()),
("sum", False, ["cat_1cat_2"]),
("sum", lib.no_default, ["cat_1cat_2"]),
("prod", True, ()),
("prod", False, ()),
("prod", lib.no_default, ()),
("min", True, ()),
("min", False, ["cat_1"]),
("min", lib.no_default, ["cat_1"]),
("max", True, ()),
("max", False, ["cat_2"]),
("max", lib.no_default, ["cat_2"]),
("first", True, ()),
("first", False, ["cat_1"]),
("first", lib.no_default, ["cat_1"]),
("last", True, ()),
("last", False, ["cat_2"]),
("last", lib.no_default, ["cat_2"]),
],
)
def test_series_downsample_method(method, numeric_only, expected_data):
# GH#46442 test if `numeric_only` behave as expected for SeriesGroupBy

index = date_range("2018-01-01", periods=2, freq="D")
expected_index = date_range("2018-12-31", periods=1, freq="Y")
df = Series(["cat_1", "cat_2"], index=index)
resampled = df.resample("Y")

func = getattr(resampled, method)
if numeric_only and numeric_only is not lib.no_default:
with pytest.raises(NotImplementedError, match="not implement numeric_only"):
func(numeric_only=numeric_only)
elif method == "prod":
with pytest.raises(TypeError, match="can't multiply sequence by non-int"):
func(numeric_only=numeric_only)
else:
result = func(numeric_only=numeric_only)
expected = Series(expected_data, index=expected_index)
tm.assert_series_equal(result, expected)

0 comments on commit 4cf8d55

Please sign in to comment.