DEPR: Enforce deprecation of previous implementation of DataFrame.sta…

…ck (pandas-dev#57302) * DEPR: Enforce deprecation of previous implementation of DataFrame.stack * fixup * whatsnew
jordan-d-murphy · Feb 14, 2024 · 44c50b2 · 44c50b2
1 parent 99a30a6
commit 44c50b2
Show file tree

Hide file tree

Showing 24 changed files with 86 additions and 77 deletions.
diff --git a/doc/source/getting_started/comparison/comparison_with_r.rst b/doc/source/getting_started/comparison/comparison_with_r.rst
@@ -438,7 +438,7 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
    )
 
    pd.melt(cheese, id_vars=["first", "last"])
-   cheese.set_index(["first", "last"]).stack(future_stack=True)  # alternative way
+   cheese.set_index(["first", "last"]).stack()  # alternative way
 
 For more details and examples see :ref:`the reshaping documentation
 <reshaping.melt>`.

diff --git a/doc/source/user_guide/10min.rst b/doc/source/user_guide/10min.rst
@@ -563,7 +563,7 @@ columns:
 
 .. ipython:: python
 
-   stacked = df2.stack(future_stack=True)
+   stacked = df2.stack()
    stacked
 
 With a "stacked" DataFrame or Series (having a :class:`MultiIndex` as the

diff --git a/doc/source/user_guide/cookbook.rst b/doc/source/user_guide/cookbook.rst
@@ -311,7 +311,7 @@ The :ref:`multindexing <advanced.hierarchical>` docs.
    df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns])
    df
    # Now stack & Reset
-   df = df.stack(0, future_stack=True).reset_index(1)
+   df = df.stack(0).reset_index(1)
    df
    # And fix the labels (Notice the label 'level_1' got added automatically)
    df.columns = ["Sample", "All_X", "All_Y"]
@@ -688,7 +688,7 @@ The :ref:`Pivot <reshaping.pivot>` docs.
        aggfunc="sum",
        margins=True,
    )
-   table.stack("City", future_stack=True)
+   table.stack("City")
 
 `Frequency table like plyr in R
 <https://stackoverflow.com/questions/15589354/frequency-tables-in-pandas-like-plyr-in-r>`__

diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
@@ -1721,4 +1721,4 @@ column index name will be used as the name of the inserted column:
 
    result
 
-   result.stack(future_stack=True)
+   result.stack()
diff --git a/doc/source/user_guide/reshaping.rst b/doc/source/user_guide/reshaping.rst
@@ -163,7 +163,7 @@ as having a multi-level index:
 
 .. ipython:: python
 
-    table.stack(future_stack=True)
+    table.stack()
 
 .. _reshaping.stacking:
 
@@ -209,7 +209,7 @@ stacked level becomes the new lowest level in a :class:`MultiIndex` on the colum
 
 .. ipython:: python
 
-   stacked = df2.stack(future_stack=True)
+   stacked = df2.stack()
    stacked
 
 With a "stacked" :class:`DataFrame` or :class:`Series` (having a :class:`MultiIndex` as the
@@ -245,7 +245,7 @@ will result in a **sorted** copy of the original :class:`DataFrame` or :class:`S
    index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]])
    df = pd.DataFrame(np.random.randn(4), index=index, columns=["A"])
    df
-   all(df.unstack().stack(future_stack=True) == df.sort_index())
+   all(df.unstack().stack() == df.sort_index())
 
 .. _reshaping.stack_multiple:
 
@@ -270,16 +270,16 @@ processed individually.
     df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
     df
 
-    df.stack(level=["animal", "hair_length"], future_stack=True)
+    df.stack(level=["animal", "hair_length"])
 
 The list of levels can contain either level names or level numbers but
 not a mixture of the two.
 
 .. ipython:: python
 
-    # df.stack(level=['animal', 'hair_length'], future_stack=True)
+    # df.stack(level=['animal', 'hair_length'])
     # from above is equivalent to:
-    df.stack(level=[1, 2], future_stack=True)
+    df.stack(level=[1, 2])
 
 Missing data
 ~~~~~~~~~~~~

diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -112,6 +112,7 @@ Removal of prior version deprecations/changes
 - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
 - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
 - Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
+- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
 - Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)
 - Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
 - Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -9188,7 +9188,7 @@ def stack(
         level: IndexLabel = -1,
         dropna: bool | lib.NoDefault = lib.no_default,
         sort: bool | lib.NoDefault = lib.no_default,
-        future_stack: bool = False,
+        future_stack: bool = True,
     ):
         """
         Stack the prescribed level(s) from columns to index.
@@ -9261,7 +9261,7 @@ def stack(
              weight height
         cat       0      1
         dog       2      3
-        >>> df_single_level_cols.stack(future_stack=True)
+        >>> df_single_level_cols.stack()
         cat  weight    0
              height    1
         dog  weight    2
@@ -9284,7 +9284,7 @@ def stack(
                  kg    pounds
         cat       1        2
         dog       2        4
-        >>> df_multi_level_cols1.stack(future_stack=True)
+        >>> df_multi_level_cols1.stack()
                     weight
         cat kg           1
             pounds       2
@@ -9308,7 +9308,7 @@ def stack(
                 kg      m
         cat    1.0    2.0
         dog    3.0    4.0
-        >>> df_multi_level_cols2.stack(future_stack=True)
+        >>> df_multi_level_cols2.stack()
                 weight  height
         cat kg     1.0     NaN
             m      NaN     2.0
@@ -9319,13 +9319,13 @@ def stack(
 
         The first parameter controls which level or levels are stacked:
 
-        >>> df_multi_level_cols2.stack(0, future_stack=True)
+        >>> df_multi_level_cols2.stack(0)
                      kg    m
         cat weight  1.0  NaN
             height  NaN  2.0
         dog weight  3.0  NaN
             height  NaN  4.0
-        >>> df_multi_level_cols2.stack([0, 1], future_stack=True)
+        >>> df_multi_level_cols2.stack([0, 1])
         cat  weight  kg    1.0
              height  m     2.0
         dog  weight  kg    3.0
@@ -9338,19 +9338,14 @@ def stack(
                 stack_multiple,
             )
 
-            if (
-                dropna is not lib.no_default
-                or sort is not lib.no_default
-                or self.columns.nlevels > 1
-            ):
-                warnings.warn(
-                    "The previous implementation of stack is deprecated and will be "
-                    "removed in a future version of pandas. See the What's New notes "
-                    "for pandas 2.1.0 for details. Specify future_stack=True to adopt "
-                    "the new implementation and silence this warning.",
-                    FutureWarning,
-                    stacklevel=find_stack_level(),
-                )
+            warnings.warn(
+                "The previous implementation of stack is deprecated and will be "
+                "removed in a future version of pandas. See the What's New notes "
+                "for pandas 2.1.0 for details. Do not specify the future_stack "
+                "argument to adopt the new implementation and silence this warning.",
+                FutureWarning,
+                stacklevel=find_stack_level(),
+            )
 
             if dropna is lib.no_default:
                 dropna = True
@@ -9366,14 +9361,14 @@ def stack(
 
             if dropna is not lib.no_default:
                 raise ValueError(
-                    "dropna must be unspecified with future_stack=True as the new "
+                    "dropna must be unspecified as the new "
                     "implementation does not introduce rows of NA values. This "
                     "argument will be removed in a future version of pandas."
                 )
 
             if sort is not lib.no_default:
                 raise ValueError(
-                    "Cannot specify sort with future_stack=True, this argument will be "
+                    "Cannot specify sort, this argument will be "
                     "removed in a future version of pandas. Sort the result using "
                     ".sort_index instead."
                 )

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -513,7 +513,7 @@ def _wrap_applied_output(
             res_df = self.obj._constructor_expanddim(values, index=index)
             # if self.observed is False,
             # keep all-NaN rows created while re-indexing
-            res_ser = res_df.stack(future_stack=True)
+            res_ser = res_df.stack()
             res_ser.name = self.obj.name
             return res_ser
         elif isinstance(values[0], (Series, DataFrame)):

diff --git a/pandas/core/resample.py b/pandas/core/resample.py
@@ -1370,7 +1370,7 @@ def size(self):
         # If the result is a non-empty DataFrame we stack to get a Series
         # GH 46826
         if isinstance(result, ABCDataFrame) and not result.empty:
-            result = result.stack(future_stack=True)
+            result = result.stack()
 
         if not len(self.ax):
             from pandas import Series

diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py
@@ -420,7 +420,7 @@ def _all_key(key):
 
     if len(cols) > 0:
         row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc)
-        row_margin = row_margin.stack(future_stack=True)
+        row_margin = row_margin.stack()
 
         # GH#26568. Use names instead of indices in case of numeric names
         new_order_indices = [len(cols)] + list(range(len(cols)))

diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py
@@ -518,7 +518,7 @@ def unstack(
         if isinstance(obj.index, MultiIndex):
             return _unstack_frame(obj, level, fill_value=fill_value, sort=sort)
         else:
-            return obj.T.stack(future_stack=True)
+            return obj.T.stack()
     elif not isinstance(obj.index, MultiIndex):
         # GH 36113
         # Give nicer error messages when unstack a Series whose

diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py
@@ -127,7 +127,7 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
     @pytest.mark.xfail(reason="Different definitions of NA")
     def test_stack(self):
         """
-        The test does .astype(object).stack(future_stack=True). If we happen to have
+        The test does .astype(object).stack(). If we happen to have
         any missing values in `data`, then we'll end up with different
         rows since we consider `{}` NA, but `.astype(object)` doesn't.
         """

diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py
@@ -106,7 +106,7 @@ def test_reset_index_with_intervals(self):
         tm.assert_frame_equal(result2, original)
 
     def test_reset_index(self, float_frame):
-        stacked = float_frame.stack(future_stack=True)[::2]
+        stacked = float_frame.stack()[::2]
         stacked = DataFrame({"foo": stacked, "bar": stacked})
 
         names = ["first", "second"]
@@ -739,7 +739,7 @@ def test_reset_index_rename(float_frame):
 
 def test_reset_index_rename_multiindex(float_frame):
     # GH 6878
-    stacked_df = float_frame.stack(future_stack=True)[::2]
+    stacked_df = float_frame.stack()[::2]
     stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df})
 
     names = ["first", "second"]
@@ -753,7 +753,7 @@ def test_reset_index_rename_multiindex(float_frame):
 
 def test_errorreset_index_rename(float_frame):
     # GH 6878
-    stacked_df = float_frame.stack(future_stack=True)[::2]
+    stacked_df = float_frame.stack()[::2]
     stacked_df = DataFrame({"first": stacked_df, "second": stacked_df})
 
     with pytest.raises(

diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py
@@ -28,6 +28,9 @@ def future_stack(request):
 
 
 class TestDataFrameReshape:
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     def test_stack_unstack(self, float_frame, future_stack):
         df = float_frame.copy()
         df[:] = np.arange(np.prod(df.shape)).reshape(df.shape)
@@ -1157,6 +1160,9 @@ def test_stack_full_multiIndex(self, future_stack):
         expected["B"] = expected["B"].astype(df.dtypes.iloc[0])
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     @pytest.mark.parametrize("ordered", [False, True])
     def test_stack_preserve_categorical_dtype(self, ordered, future_stack):
         # GH13854
@@ -1201,6 +1207,9 @@ def test_stack_multi_preserve_categorical_dtype(
 
         tm.assert_series_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     def test_stack_preserve_categorical_dtype_values(self, future_stack):
         # GH-23077
         cat = pd.Categorical(["a", "a", "b", "c"])
@@ -1393,6 +1402,7 @@ def test_unstack_timezone_aware_values():
     tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated")
 def test_stack_timezone_aware_values(future_stack):
     # GH 19420
     ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York")
@@ -1719,6 +1729,9 @@ def test_stack(self, multiindex_year_month_day_dataframe_random_data, future_sta
         expected = ymd.unstack(0).stack(0, future_stack=future_stack)
         tm.assert_equal(result, expected)
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     @pytest.mark.parametrize(
         "idx, exp_idx",
         [
@@ -1805,6 +1818,9 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data, future_stack)
         assert result.name is None
         assert stacked["bar"].dtype == np.float64
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     def test_unstack_bug(self, future_stack):
         df = DataFrame(
             {
@@ -1839,6 +1855,9 @@ def test_stack_unstack_preserve_names(
         restacked = unstacked.stack(future_stack=future_stack)
         assert restacked.index.names == frame.index.names
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     @pytest.mark.parametrize("method", ["stack", "unstack"])
     def test_stack_unstack_wrong_level_name(
         self, method, multiindex_dataframe_random_data, future_stack
@@ -2308,6 +2327,9 @@ def test_unstack_preserve_types(
         )
         assert unstacked["F", 1].dtype == np.float64
 
+    @pytest.mark.filterwarnings(
+        "ignore:The previous implementation of stack is deprecated"
+    )
     def test_unstack_group_index_overflow(self, future_stack):
         codes = np.tile(np.arange(500), 2)
         level = np.arange(500)
@@ -2610,6 +2632,7 @@ def test_unstack_mixed_level_names(self):
         tm.assert_frame_equal(result, expected)
 
 
+@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated")
 def test_stack_tuple_columns(future_stack):
     # GH#54948 - test stack when the input has a non-MultiIndex with tuples
     df = DataFrame(
@@ -2643,7 +2666,7 @@ def test_stack_preserves_na(dtype, na_value, test_multiindex):
     else:
         index = Index([na_value], dtype=dtype)
     df = DataFrame({"a": [1]}, index=index)
-    result = df.stack(future_stack=True)
+    result = df.stack()
 
     if test_multiindex:
         expected_index = MultiIndex.from_arrays(