Skip to content

Commit

Permalink
DEPR: Enforce deprecation of previous implementation of DataFrame.sta…
Browse files Browse the repository at this point in the history
…ck (pandas-dev#57302)

* DEPR: Enforce deprecation of previous implementation of DataFrame.stack

* fixup

* whatsnew
  • Loading branch information
rhshadrach authored Feb 14, 2024
1 parent 99a30a6 commit 44c50b2
Show file tree
Hide file tree
Showing 24 changed files with 86 additions and 77 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,7 @@ In Python, the :meth:`~pandas.melt` method is the R equivalent:
)
pd.melt(cheese, id_vars=["first", "last"])
cheese.set_index(["first", "last"]).stack(future_stack=True) # alternative way
cheese.set_index(["first", "last"]).stack() # alternative way
For more details and examples see :ref:`the reshaping documentation
<reshaping.melt>`.
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/10min.rst
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ columns:

.. ipython:: python
stacked = df2.stack(future_stack=True)
stacked = df2.stack()
stacked
With a "stacked" DataFrame or Series (having a :class:`MultiIndex` as the
Expand Down
4 changes: 2 additions & 2 deletions doc/source/user_guide/cookbook.rst
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ The :ref:`multindexing <advanced.hierarchical>` docs.
df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns])
df
# Now stack & Reset
df = df.stack(0, future_stack=True).reset_index(1)
df = df.stack(0).reset_index(1)
df
# And fix the labels (Notice the label 'level_1' got added automatically)
df.columns = ["Sample", "All_X", "All_Y"]
Expand Down Expand Up @@ -688,7 +688,7 @@ The :ref:`Pivot <reshaping.pivot>` docs.
aggfunc="sum",
margins=True,
)
table.stack("City", future_stack=True)
table.stack("City")
`Frequency table like plyr in R
<https://stackoverflow.com/questions/15589354/frequency-tables-in-pandas-like-plyr-in-r>`__
Expand Down
2 changes: 1 addition & 1 deletion doc/source/user_guide/groupby.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1721,4 +1721,4 @@ column index name will be used as the name of the inserted column:
result
result.stack(future_stack=True)
result.stack()
12 changes: 6 additions & 6 deletions doc/source/user_guide/reshaping.rst
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ as having a multi-level index:

.. ipython:: python
table.stack(future_stack=True)
table.stack()
.. _reshaping.stacking:

Expand Down Expand Up @@ -209,7 +209,7 @@ stacked level becomes the new lowest level in a :class:`MultiIndex` on the colum

.. ipython:: python
stacked = df2.stack(future_stack=True)
stacked = df2.stack()
stacked
With a "stacked" :class:`DataFrame` or :class:`Series` (having a :class:`MultiIndex` as the
Expand Down Expand Up @@ -245,7 +245,7 @@ will result in a **sorted** copy of the original :class:`DataFrame` or :class:`S
index = pd.MultiIndex.from_product([[2, 1], ["a", "b"]])
df = pd.DataFrame(np.random.randn(4), index=index, columns=["A"])
df
all(df.unstack().stack(future_stack=True) == df.sort_index())
all(df.unstack().stack() == df.sort_index())
.. _reshaping.stack_multiple:

Expand All @@ -270,16 +270,16 @@ processed individually.
df = pd.DataFrame(np.random.randn(4, 4), columns=columns)
df
df.stack(level=["animal", "hair_length"], future_stack=True)
df.stack(level=["animal", "hair_length"])
The list of levels can contain either level names or level numbers but
not a mixture of the two.

.. ipython:: python
# df.stack(level=['animal', 'hair_length'], future_stack=True)
# df.stack(level=['animal', 'hair_length'])
# from above is equivalent to:
df.stack(level=[1, 2], future_stack=True)
df.stack(level=[1, 2])
Missing data
~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ Removal of prior version deprecations/changes
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
- Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`)
- Removed ``DataFrame.bool`` and ``Series.bool`` (:issue:`51756`)
- Removed ``DataFrame.first`` and ``DataFrame.last`` (:issue:`53710`)
Expand Down
37 changes: 16 additions & 21 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -9188,7 +9188,7 @@ def stack(
level: IndexLabel = -1,
dropna: bool | lib.NoDefault = lib.no_default,
sort: bool | lib.NoDefault = lib.no_default,
future_stack: bool = False,
future_stack: bool = True,
):
"""
Stack the prescribed level(s) from columns to index.
Expand Down Expand Up @@ -9261,7 +9261,7 @@ def stack(
weight height
cat 0 1
dog 2 3
>>> df_single_level_cols.stack(future_stack=True)
>>> df_single_level_cols.stack()
cat weight 0
height 1
dog weight 2
Expand All @@ -9284,7 +9284,7 @@ def stack(
kg pounds
cat 1 2
dog 2 4
>>> df_multi_level_cols1.stack(future_stack=True)
>>> df_multi_level_cols1.stack()
weight
cat kg 1
pounds 2
Expand All @@ -9308,7 +9308,7 @@ def stack(
kg m
cat 1.0 2.0
dog 3.0 4.0
>>> df_multi_level_cols2.stack(future_stack=True)
>>> df_multi_level_cols2.stack()
weight height
cat kg 1.0 NaN
m NaN 2.0
Expand All @@ -9319,13 +9319,13 @@ def stack(
The first parameter controls which level or levels are stacked:
>>> df_multi_level_cols2.stack(0, future_stack=True)
>>> df_multi_level_cols2.stack(0)
kg m
cat weight 1.0 NaN
height NaN 2.0
dog weight 3.0 NaN
height NaN 4.0
>>> df_multi_level_cols2.stack([0, 1], future_stack=True)
>>> df_multi_level_cols2.stack([0, 1])
cat weight kg 1.0
height m 2.0
dog weight kg 3.0
Expand All @@ -9338,19 +9338,14 @@ def stack(
stack_multiple,
)

if (
dropna is not lib.no_default
or sort is not lib.no_default
or self.columns.nlevels > 1
):
warnings.warn(
"The previous implementation of stack is deprecated and will be "
"removed in a future version of pandas. See the What's New notes "
"for pandas 2.1.0 for details. Specify future_stack=True to adopt "
"the new implementation and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)
warnings.warn(
"The previous implementation of stack is deprecated and will be "
"removed in a future version of pandas. See the What's New notes "
"for pandas 2.1.0 for details. Do not specify the future_stack "
"argument to adopt the new implementation and silence this warning.",
FutureWarning,
stacklevel=find_stack_level(),
)

if dropna is lib.no_default:
dropna = True
Expand All @@ -9366,14 +9361,14 @@ def stack(

if dropna is not lib.no_default:
raise ValueError(
"dropna must be unspecified with future_stack=True as the new "
"dropna must be unspecified as the new "
"implementation does not introduce rows of NA values. This "
"argument will be removed in a future version of pandas."
)

if sort is not lib.no_default:
raise ValueError(
"Cannot specify sort with future_stack=True, this argument will be "
"Cannot specify sort, this argument will be "
"removed in a future version of pandas. Sort the result using "
".sort_index instead."
)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,7 @@ def _wrap_applied_output(
res_df = self.obj._constructor_expanddim(values, index=index)
# if self.observed is False,
# keep all-NaN rows created while re-indexing
res_ser = res_df.stack(future_stack=True)
res_ser = res_df.stack()
res_ser.name = self.obj.name
return res_ser
elif isinstance(values[0], (Series, DataFrame)):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,7 @@ def size(self):
# If the result is a non-empty DataFrame we stack to get a Series
# GH 46826
if isinstance(result, ABCDataFrame) and not result.empty:
result = result.stack(future_stack=True)
result = result.stack()

if not len(self.ax):
from pandas import Series
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/pivot.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def _all_key(key):

if len(cols) > 0:
row_margin = data[cols + values].groupby(cols, observed=observed).agg(aggfunc)
row_margin = row_margin.stack(future_stack=True)
row_margin = row_margin.stack()

# GH#26568. Use names instead of indices in case of numeric names
new_order_indices = [len(cols)] + list(range(len(cols)))
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,7 @@ def unstack(
if isinstance(obj.index, MultiIndex):
return _unstack_frame(obj, level, fill_value=fill_value, sort=sort)
else:
return obj.T.stack(future_stack=True)
return obj.T.stack()
elif not isinstance(obj.index, MultiIndex):
# GH 36113
# Give nicer error messages when unstack a Series whose
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def test_series_constructor_scalar_with_index(self, data, dtype):
@pytest.mark.xfail(reason="Different definitions of NA")
def test_stack(self):
"""
The test does .astype(object).stack(future_stack=True). If we happen to have
The test does .astype(object).stack(). If we happen to have
any missing values in `data`, then we'll end up with different
rows since we consider `{}` NA, but `.astype(object)` doesn't.
"""
Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/frame/methods/test_reset_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def test_reset_index_with_intervals(self):
tm.assert_frame_equal(result2, original)

def test_reset_index(self, float_frame):
stacked = float_frame.stack(future_stack=True)[::2]
stacked = float_frame.stack()[::2]
stacked = DataFrame({"foo": stacked, "bar": stacked})

names = ["first", "second"]
Expand Down Expand Up @@ -739,7 +739,7 @@ def test_reset_index_rename(float_frame):

def test_reset_index_rename_multiindex(float_frame):
# GH 6878
stacked_df = float_frame.stack(future_stack=True)[::2]
stacked_df = float_frame.stack()[::2]
stacked_df = DataFrame({"foo": stacked_df, "bar": stacked_df})

names = ["first", "second"]
Expand All @@ -753,7 +753,7 @@ def test_reset_index_rename_multiindex(float_frame):

def test_errorreset_index_rename(float_frame):
# GH 6878
stacked_df = float_frame.stack(future_stack=True)[::2]
stacked_df = float_frame.stack()[::2]
stacked_df = DataFrame({"first": stacked_df, "second": stacked_df})

with pytest.raises(
Expand Down
25 changes: 24 additions & 1 deletion pandas/tests/frame/test_stack_unstack.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def future_stack(request):


class TestDataFrameReshape:
@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
def test_stack_unstack(self, float_frame, future_stack):
df = float_frame.copy()
df[:] = np.arange(np.prod(df.shape)).reshape(df.shape)
Expand Down Expand Up @@ -1157,6 +1160,9 @@ def test_stack_full_multiIndex(self, future_stack):
expected["B"] = expected["B"].astype(df.dtypes.iloc[0])
tm.assert_frame_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
@pytest.mark.parametrize("ordered", [False, True])
def test_stack_preserve_categorical_dtype(self, ordered, future_stack):
# GH13854
Expand Down Expand Up @@ -1201,6 +1207,9 @@ def test_stack_multi_preserve_categorical_dtype(

tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
def test_stack_preserve_categorical_dtype_values(self, future_stack):
# GH-23077
cat = pd.Categorical(["a", "a", "b", "c"])
Expand Down Expand Up @@ -1393,6 +1402,7 @@ def test_unstack_timezone_aware_values():
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated")
def test_stack_timezone_aware_values(future_stack):
# GH 19420
ts = date_range(freq="D", start="20180101", end="20180103", tz="America/New_York")
Expand Down Expand Up @@ -1719,6 +1729,9 @@ def test_stack(self, multiindex_year_month_day_dataframe_random_data, future_sta
expected = ymd.unstack(0).stack(0, future_stack=future_stack)
tm.assert_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
@pytest.mark.parametrize(
"idx, exp_idx",
[
Expand Down Expand Up @@ -1805,6 +1818,9 @@ def test_stack_mixed_dtype(self, multiindex_dataframe_random_data, future_stack)
assert result.name is None
assert stacked["bar"].dtype == np.float64

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
def test_unstack_bug(self, future_stack):
df = DataFrame(
{
Expand Down Expand Up @@ -1839,6 +1855,9 @@ def test_stack_unstack_preserve_names(
restacked = unstacked.stack(future_stack=future_stack)
assert restacked.index.names == frame.index.names

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
@pytest.mark.parametrize("method", ["stack", "unstack"])
def test_stack_unstack_wrong_level_name(
self, method, multiindex_dataframe_random_data, future_stack
Expand Down Expand Up @@ -2308,6 +2327,9 @@ def test_unstack_preserve_types(
)
assert unstacked["F", 1].dtype == np.float64

@pytest.mark.filterwarnings(
"ignore:The previous implementation of stack is deprecated"
)
def test_unstack_group_index_overflow(self, future_stack):
codes = np.tile(np.arange(500), 2)
level = np.arange(500)
Expand Down Expand Up @@ -2610,6 +2632,7 @@ def test_unstack_mixed_level_names(self):
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:The previous implementation of stack is deprecated")
def test_stack_tuple_columns(future_stack):
# GH#54948 - test stack when the input has a non-MultiIndex with tuples
df = DataFrame(
Expand Down Expand Up @@ -2643,7 +2666,7 @@ def test_stack_preserves_na(dtype, na_value, test_multiindex):
else:
index = Index([na_value], dtype=dtype)
df = DataFrame({"a": [1]}, index=index)
result = df.stack(future_stack=True)
result = df.stack()

if test_multiindex:
expected_index = MultiIndex.from_arrays(
Expand Down
Loading

0 comments on commit 44c50b2

Please sign in to comment.