pandas-dev · jreback · May 22, 2020 · Apr 1, 2020 · Apr 1, 2020 · Apr 1, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -254,6 +254,7 @@ Deprecations
 - :meth:`DataFrame.to_dict` has deprecated accepting short names for ``orient`` in future versions (:issue:`32515`)
 - :meth:`Categorical.to_dense` is deprecated and will be removed in a future version, use ``np.asarray(cat)`` instead (:issue:`32639`)
 - The ``fastpath`` keyword in the ``SingleBlockManager`` constructor is deprecated and will be removed in a future version (:issue:`33092`)
+- The ``squeeze`` keyword in the ``groupby`` function is deprecated and will be removed in a future version (:issue:`32380`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -40,6 +40,7 @@
 from pandas._config import get_option
 
 from pandas._libs import algos as libalgos, lib, properties
+from pandas._libs.lib import no_default
 from pandas._typing import Axes, Axis, Dtype, FilePathOrBuffer, Label, Level, Renamer
 from pandas.compat import PY37
 from pandas.compat._optional import import_optional_dependency
@@ -5813,26 +5814,35 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        squeeze: bool = False,
+        squeeze: bool = no_default,
         observed: bool = False,
     ) -> "DataFrameGroupBy":
         from pandas.core.groupby.generic import DataFrameGroupBy
 
+        if squeeze is not no_default:
+            warnings.warn(
+                (
+                    "The `squeeze` parameter in pd.groupby is deprecated and "
+                    "will be removed in a future version."
+                ),
+                FutureWarning,
+                stacklevel=2,
+            )
+
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
-
-        return DataFrameGroupBy(
+        grouped = DataFrameGroupBy(
             obj=self,
             keys=by,
             axis=axis,
             level=level,
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,
             observed=observed,
         )
+        return grouped
 
     _shared_docs[
         "pivot"

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -7419,6 +7419,8 @@ def clip(
         squeeze : bool, default False
             Reduce the dimensionality of the return type if possible,
             otherwise return a consistent type.
+            deprecated:: 1.1.0
+
         observed : bool, default False
             This only applies if any of the groupers are Categoricals.
             If True: only show observed values for categorical groupers.

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -1253,34 +1253,7 @@ def first_not_none(values):
 
             if isinstance(v, (np.ndarray, Index, Series)):
                 if isinstance(v, Series):
-                    applied_index = self._selected_obj._get_axis(self.axis)
                     all_indexed_same = all_indexes_same([x.index for x in values])
-                    singular_series = len(values) == 1 and applied_index.nlevels == 1
-
-                    # GH3596
-                    # provide a reduction (Frame -> Series) if groups are
-                    # unique
-                    if self.squeeze:
-                        # assign the name to this series
-                        if singular_series:
-                            values[0].name = keys[0]
-
-                            # GH2893
-                            # we have series in the values array, we want to
-                            # produce a series:
-                            # if any of the sub-series are not indexed the same
-                            # OR we don't have a multi-index and we have only a
-                            # single values
-                            return self._concat_objects(
-                                keys, values, not_indexed_same=not_indexed_same
-                            )
-
-                        # still a series
-                        # path added as of GH 5545
-                        elif all_indexed_same:
-                            from pandas.core.reshape.concat import concat
-
-                            return concat(values)
 
                     if not all_indexed_same:
                         # GH 8467

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -369,7 +369,6 @@ def __init__(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        squeeze: bool = False,
         observed: bool = False,
         mutated: bool = False,
     ):
@@ -391,7 +390,6 @@ def __init__(
         self.keys = keys
         self.sort = sort
         self.group_keys = group_keys
-        self.squeeze = squeeze
         self.observed = observed
         self.mutated = mutated
 
@@ -2521,7 +2519,6 @@ def get_groupby(
     as_index: bool = True,
     sort: bool = True,
     group_keys: bool = True,
-    squeeze: bool = False,
     observed: bool = False,
     mutated: bool = False,
 ) -> GroupBy:
@@ -2549,7 +2546,6 @@ def get_groupby(
         as_index=as_index,
         sort=sort,
         group_keys=group_keys,
-        squeeze=squeeze,
         observed=observed,
         mutated=mutated,
     )
diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -76,7 +76,6 @@ def __init__(self, obj, groupby=None, axis=0, kind=None, **kwargs):
         self.sort = True
         self.axis = axis
         self.kind = kind
-        self.squeeze = False
         self.group_keys = True
         self.as_index = True
         self.exclusions = set()

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -22,6 +22,7 @@
 from pandas._config import get_option
 
 from pandas._libs import lib, properties, reshape, tslibs
+from pandas._libs.lib import no_default
 from pandas._typing import Axis, DtypeObj, Label
 from pandas.compat.numpy import function as nv
 from pandas.util._decorators import Appender, Substitution, doc
@@ -1646,11 +1647,21 @@ def groupby(
         as_index: bool = True,
         sort: bool = True,
         group_keys: bool = True,
-        squeeze: bool = False,
+        squeeze: bool = no_default,
         observed: bool = False,
     ) -> "SeriesGroupBy":
         from pandas.core.groupby.generic import SeriesGroupBy
 
+        if squeeze is not no_default:
+            warnings.warn(
+                (
+                    "The `squeeze` parameter in pd.groupby is deprecated and "
+                    "will be removed in a future version."
+                ),
+                FutureWarning,
+                stacklevel=2,
+            )
+
         if level is None and by is None:
             raise TypeError("You have to supply one of 'by' and 'level'")
         axis = self._get_axis_number(axis)
@@ -1663,7 +1674,6 @@ def groupby(
             as_index=as_index,
             sort=sort,
             group_keys=group_keys,
-            squeeze=squeeze,
             observed=observed,
         )
 

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -94,6 +94,11 @@ def max_value(group):
     tm.assert_series_equal(result, expected)
 
 
+@pytest.mark.xfail(
+    reason="GH#32380; the squeeze option will be "
+    "deprecated, so the DataFrame won't be converted "
+    "to a Series."
+)
 def test_groupby_return_type():
 
     # GH2893, return a reduced type
@@ -109,7 +114,8 @@ def test_groupby_return_type():
     def func(dataf):
         return dataf["val2"] - dataf["val2"].mean()
 
-    result = df1.groupby("val1", squeeze=True).apply(func)
+    with tm.assert_produces_warning(FutureWarning):
+        result = df1.groupby("val1", squeeze=True).apply(func)
     assert isinstance(result, Series)
 
     df2 = DataFrame(
@@ -124,12 +130,14 @@ def func(dataf):
     def func(dataf):
         return dataf["val2"] - dataf["val2"].mean()
 
-    result = df2.groupby("val1", squeeze=True).apply(func)
+    with tm.assert_produces_warning(FutureWarning):
+        result = df2.groupby("val1", squeeze=True).apply(func)
     assert isinstance(result, Series)
 
     # GH3596, return a consistent type (regression in 0.11 from 0.10.1)
     df = DataFrame([[1, 1], [1, 1]], columns=["X", "Y"])
-    result = df.groupby("X", squeeze=False).count()
+    with tm.assert_produces_warning(FutureWarning):
+        result = df.groupby("X", squeeze=False).count()
     assert isinstance(result, DataFrame)
 
 
@@ -2057,3 +2065,26 @@ def test_groups_repr_truncates(max_seq_items, expected):
 
         result = df.groupby(np.array(df.a)).groups.__repr__()
         assert result == expected
+
+
+@pytest.mark.parametrize("param", [True, False])
+def test_groupy_squeeze_is_deprecated(param):
+    # GH 32380: Deprecate the squeeze option in groupby
+    df = DataFrame(
+        [
+            {"val1": 1, "val2": 20},
+            {"val1": 1, "val2": 19},
+            {"val1": 1, "val2": 27},
+            {"val1": 1, "val2": 12},
+        ]
+    )
+
+    def func(dataf):
+        return dataf["val2"] - dataf["val2"].mean()
+
+    with tm.assert_produces_warning(FutureWarning):
+        result = df.groupby("val1", squeeze=param).apply(func)
+    assert isinstance(result, DataFrame)
+
+    with tm.assert_produces_warning(FutureWarning):
+        df["val1"].groupby(["a", "b", "a", "b"], squeeze=False)