Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

32380 deprecate squeeze in groupby #33218

Merged
merged 12 commits into from
May 22, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,7 @@ Deprecations
- :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
- :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
- The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
- The ``squeeze`` keyword in the ``groupby`` function is deprecated and will be removed in a future version (:issue:`32380`)

.. ---------------------------------------------------------------------------

Expand Down
18 changes: 14 additions & 4 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
from pandas._config import get_option

from pandas._libs import algos as libalgos, lib, properties
from pandas._libs.lib import no_default
from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
from pandas.compat import PY37
from pandas.compat._optional import import_optional_dependency
Expand Down Expand Up @@ -5813,26 +5814,35 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
squeeze: bool = no_default,
observed: bool = False,
) -> "DataFrameGroupBy":
from pandas.core.groupby.generic import DataFrameGroupBy

if squeeze is not no_default:
warnings.warn(
(
"The `squeeze` parameter in pd.groupby is deprecated and "
phofl marked this conversation as resolved.
Show resolved Hide resolved
"will be removed in a future version."
),
FutureWarning,
stacklevel=2,
)

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)

return DataFrameGroupBy(
phofl marked this conversation as resolved.
Show resolved Hide resolved
grouped = DataFrameGroupBy(
obj=self,
keys=by,
axis=axis,
level=level,
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
phofl marked this conversation as resolved.
Show resolved Hide resolved
observed=observed,
)
return grouped

_shared_docs[
"pivot"
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -7419,6 +7419,8 @@ def clip(
squeeze : bool, default False
Reduce the dimensionality of the return type if possible,
otherwise return a consistent type.
deprecated:: 1.1.0
phofl marked this conversation as resolved.
Show resolved Hide resolved

observed : bool, default False
This only applies if any of the groupers are Categoricals.
If True: only show observed values for categorical groupers.
Expand Down
27 changes: 0 additions & 27 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,34 +1253,7 @@ def first_not_none(values):

if isinstance(v, (np.ndarray, Index, Series)):
if isinstance(v, Series):
applied_index = self._selected_obj._get_axis(self.axis)
phofl marked this conversation as resolved.
Show resolved Hide resolved
all_indexed_same = all_indexes_same([x.index for x in values])
singular_series = len(values) == 1 and applied_index.nlevels == 1

# GH3596
# provide a reduction (Frame -> Series) if groups are
# unique
if self.squeeze:
# assign the name to this series
if singular_series:
values[0].name = keys[0]

# GH2893
# we have series in the values array, we want to
# produce a series:
# if any of the sub-series are not indexed the same
# OR we don't have a multi-index and we have only a
# single values
return self._concat_objects(
keys, values, not_indexed_same=not_indexed_same
)

# still a series
# path added as of GH 5545
elif all_indexed_same:
from pandas.core.reshape.concat import concat

return concat(values)

if not all_indexed_same:
# GH 8467
Expand Down
4 changes: 0 additions & 4 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,6 @@ def __init__(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
observed: bool = False,
mutated: bool = False,
):
Expand All @@ -391,7 +390,6 @@ def __init__(
self.keys = keys
self.sort = sort
self.group_keys = group_keys
self.squeeze = squeeze
self.observed = observed
self.mutated = mutated

Expand Down Expand Up @@ -2521,7 +2519,6 @@ def get_groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
observed: bool = False,
mutated: bool = False,
) -> GroupBy:
Expand Down Expand Up @@ -2549,7 +2546,6 @@ def get_groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
mutated=mutated,
)
1 change: 0 additions & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,6 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
self.sort = True
self.axis = axis
self.kind = kind
self.squeeze = False
self.group_keys = True
self.as_index = True
self.exclusions = set()
Expand Down
14 changes: 12 additions & 2 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from pandas._config import get_option

from pandas._libs import lib, properties, reshape, tslibs
from pandas._libs.lib import no_default
from pandas._typing import Axis, DtypeObj, Label
from pandas.compat.numpy import function as nv
from pandas.util._decorators import Appender, Substitution, doc
Expand Down Expand Up @@ -1646,11 +1647,21 @@ def groupby(
as_index: bool = True,
sort: bool = True,
group_keys: bool = True,
squeeze: bool = False,
squeeze: bool = no_default,
observed: bool = False,
) -> "SeriesGroupBy":
from pandas.core.groupby.generic import SeriesGroupBy

if squeeze is not no_default:
warnings.warn(
(
"The `squeeze` parameter in pd.groupby is deprecated and "
phofl marked this conversation as resolved.
Show resolved Hide resolved
"will be removed in a future version."
),
FutureWarning,
stacklevel=2,
)

if level is None and by is None:
raise TypeError("You have to supply one of 'by' and 'level'")
axis = self._get_axis_number(axis)
Expand All @@ -1663,7 +1674,6 @@ def groupby(
as_index=as_index,
sort=sort,
group_keys=group_keys,
squeeze=squeeze,
observed=observed,
)

Expand Down
37 changes: 34 additions & 3 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ def max_value(group):
tm.assert_series_equal(result, expected)


@pytest.mark.xfail(
reason="GH#32380; the squeeze option will be "
"deprecated, so the DataFrame won't be converted "
"to a Series."
)
def test_groupby_return_type():

# GH2893, return a reduced type
Expand All @@ -109,7 +114,8 @@ def test_groupby_return_type():
def func(dataf):
return dataf["val2"] - dataf["val2"].mean()

result = df1.groupby("val1", squeeze=True).apply(func)
with tm.assert_produces_warning(FutureWarning):
result = df1.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)

df2 = DataFrame(
Expand All @@ -124,12 +130,14 @@ def func(dataf):
def func(dataf):
return dataf["val2"] - dataf["val2"].mean()

result = df2.groupby("val1", squeeze=True).apply(func)
with tm.assert_produces_warning(FutureWarning):
result = df2.groupby("val1", squeeze=True).apply(func)
assert isinstance(result, Series)

# GH3596, return a consistent type (regression in 0.11 from 0.10.1)
df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"])
result = df.groupby("X", squeeze=False).count()
with tm.assert_produces_warning(FutureWarning):
result = df.groupby("X", squeeze=False).count()
assert isinstance(result, DataFrame)


Expand Down Expand Up @@ -2057,3 +2065,26 @@ def test_groups_repr_truncates(max_seq_items, expected):

result = df.groupby(np.array(df.a)).groups.__repr__()
assert result == expected


@pytest.mark.parametrize("param", [True, False])
def test_groupy_squeeze_is_deprecated(param):
# GH 32380: Deprecate the squeeze option in groupby
df = DataFrame(
[
{"val1": 1, "val2": 20},
{"val1": 1, "val2": 19},
{"val1": 1, "val2": 27},
{"val1": 1, "val2": 12},
]
)

def func(dataf):
return dataf["val2"] - dataf["val2"].mean()

with tm.assert_produces_warning(FutureWarning):
result = df.groupby("val1", squeeze=param).apply(func)
assert isinstance(result, DataFrame)

with tm.assert_produces_warning(FutureWarning):
df["val1"].groupby(["a", "b", "a", "b"], squeeze=False)