From e679789e5659675b3980fe8707c6fe3ba5c35105 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Oct 2023 15:04:08 +0200 Subject: [PATCH] Backport PR #54922: REGR: Restore _constructor_from_mgr to pass manager object to constructor --- doc/source/whatsnew/v2.1.2.rst | 1 + pandas/core/frame.py | 19 +++++------ pandas/core/series.py | 20 +++++------ pandas/tests/frame/test_arithmetic.py | 3 ++ pandas/tests/frame/test_subclass.py | 34 ++++++++++++++++++- pandas/tests/groupby/test_groupby_subclass.py | 4 +++ pandas/tests/reshape/concat/test_concat.py | 3 ++ pandas/tests/reshape/merge/test_merge.py | 3 ++ .../tests/reshape/merge/test_merge_ordered.py | 3 ++ pandas/tests/series/methods/test_to_frame.py | 5 +++ pandas/tests/series/test_subclass.py | 4 +++ 11 files changed, 77 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v2.1.2.rst b/doc/source/whatsnew/v2.1.2.rst index 1d2d89e7eadd1..a59cadde422b9 100644 --- a/doc/source/whatsnew/v2.1.2.rst +++ b/doc/source/whatsnew/v2.1.2.rst @@ -29,6 +29,7 @@ Fixed regressions - Fixed performance regression with wide DataFrames, typically involving methods where all columns were accessed individually (:issue:`55256`, :issue:`55245`) - Fixed regression in :func:`merge_asof` raising ``TypeError`` for ``by`` with datetime and timedelta dtypes (:issue:`55453`) - Fixed regression in :meth:`DataFrame.to_sql` not roundtripping datetime columns correctly for sqlite when using ``detect_types`` (:issue:`55554`) +- Fixed regression in construction of certain DataFrame or Series subclasses (:issue:`54922`) .. --------------------------------------------------------------------------- .. _whatsnew_212.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 1614ecf1d7037..c109070ce461d 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -639,14 +639,12 @@ def _constructor(self) -> Callable[..., DataFrame]: return DataFrame def _constructor_from_mgr(self, mgr, axes): - df = self._from_mgr(mgr, axes=axes) - - if type(self) is DataFrame: - # fastpath avoiding constructor call - return df + if self._constructor is DataFrame: + # we are pandas.DataFrame (or a subclass that doesn't override _constructor) + return self._from_mgr(mgr, axes=axes) else: assert axes is mgr.axes - return self._constructor(df, copy=False) + return self._constructor(mgr) _constructor_sliced: Callable[..., Series] = Series @@ -654,13 +652,12 @@ def _sliced_from_mgr(self, mgr, axes) -> Series: return Series._from_mgr(mgr, axes) def _constructor_sliced_from_mgr(self, mgr, axes): - ser = self._sliced_from_mgr(mgr, axes=axes) - ser._name = None # caller is responsible for setting real name - if type(self) is DataFrame: - # fastpath avoiding constructor call + if self._constructor_sliced is Series: + ser = self._sliced_from_mgr(mgr, axes) + ser._name = None # caller is responsible for setting real name return ser assert axes is mgr.axes - return self._constructor_sliced(ser, copy=False) + return self._constructor_sliced(mgr) # ---------------------------------------------------------------------- # Constructors diff --git a/pandas/core/series.py b/pandas/core/series.py index 1d83643722ece..8ad049e173507 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -579,14 +579,14 @@ def _constructor(self) -> Callable[..., Series]: return Series def _constructor_from_mgr(self, mgr, axes): - ser = self._from_mgr(mgr, axes=axes) - ser._name = None # caller is responsible for setting real name - if type(self) is Series: - # fastpath avoiding constructor call + if self._constructor is Series: + # we are pandas.Series (or a subclass that doesn't override _constructor) + ser = self._from_mgr(mgr, axes=axes) + ser._name = None # caller is responsible for setting real name return ser else: assert axes is mgr.axes - return self._constructor(ser, copy=False) + return self._constructor(mgr) @property def _constructor_expanddim(self) -> Callable[..., DataFrame]: @@ -610,12 +610,12 @@ def _expanddim_from_mgr(self, mgr, axes) -> DataFrame: return DataFrame._from_mgr(mgr, axes=mgr.axes) def _constructor_expanddim_from_mgr(self, mgr, axes): - df = self._expanddim_from_mgr(mgr, axes) - if type(self) is Series: - # fastpath avoiding constructor - return df + from pandas.core.frame import DataFrame + + if self._constructor_expanddim is DataFrame: + return self._expanddim_from_mgr(mgr, axes) assert axes is mgr.axes - return self._constructor_expanddim(df, copy=False) + return self._constructor_expanddim(mgr) # types @property diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 670d4d4f554a6..e5a8feb7a89d3 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -2078,6 +2078,9 @@ def test_frame_sub_nullable_int(any_int_ea_dtype): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" +) def test_frame_op_subclass_nonclass_constructor(): # GH#43201 subclass._constructor is a function, not the subclass itself diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 37ca52eba6451..8e657f197ca1e 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -10,6 +10,10 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" +) + @pytest.fixture() def gpd_style_subclass_df(): @@ -734,8 +738,36 @@ def test_replace_list_method(self): # https://github.com/pandas-dev/pandas/pull/46018 df = tm.SubclassedDataFrame({"A": [0, 1, 2]}) msg = "The 'method' keyword in SubclassedDataFrame.replace is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): + with tm.assert_produces_warning( + FutureWarning, match=msg, raise_on_extra_warnings=False + ): result = df.replace([1, 2], method="ffill") expected = tm.SubclassedDataFrame({"A": [0, 0, 0]}) assert isinstance(result, tm.SubclassedDataFrame) tm.assert_frame_equal(result, expected) + + +class MySubclassWithMetadata(DataFrame): + _metadata = ["my_metadata"] + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + + my_metadata = kwargs.pop("my_metadata", None) + if args and isinstance(args[0], MySubclassWithMetadata): + my_metadata = args[0].my_metadata # type: ignore[has-type] + self.my_metadata = my_metadata + + @property + def _constructor(self): + return MySubclassWithMetadata + + +def test_constructor_with_metadata(): + # https://github.com/pandas-dev/pandas/pull/54922 + # https://github.com/pandas-dev/pandas/issues/55120 + df = MySubclassWithMetadata( + np.random.default_rng(2).random((5, 3)), columns=["A", "B", "C"] + ) + subset = df[["A", "B"]] + assert isinstance(subset, MySubclassWithMetadata) diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py index 773c1e60e97af..678211ea4a053 100644 --- a/pandas/tests/groupby/test_groupby_subclass.py +++ b/pandas/tests/groupby/test_groupby_subclass.py @@ -11,6 +11,10 @@ import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" +) + @pytest.mark.parametrize( "obj", diff --git a/pandas/tests/reshape/concat/test_concat.py b/pandas/tests/reshape/concat/test_concat.py index 5dde863f246d1..7863aa55152f1 100644 --- a/pandas/tests/reshape/concat/test_concat.py +++ b/pandas/tests/reshape/concat/test_concat.py @@ -591,6 +591,9 @@ def test_duplicate_keys_same_frame(): tm.assert_frame_equal(result, expected) +@pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" +) @pytest.mark.parametrize( "obj", [ diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index 304ec37b1e453..6868f7344c153 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -689,6 +689,9 @@ def test_merge_nan_right2(self): )[["i1", "i2", "i1_", "i3"]] tm.assert_frame_equal(result, expected) + @pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" + ) def test_merge_type(self, df, df2): class NotADataFrame(DataFrame): @property diff --git a/pandas/tests/reshape/merge/test_merge_ordered.py b/pandas/tests/reshape/merge/test_merge_ordered.py index 1d0d4e3eb554b..cfb4e92fb45cd 100644 --- a/pandas/tests/reshape/merge/test_merge_ordered.py +++ b/pandas/tests/reshape/merge/test_merge_ordered.py @@ -70,6 +70,9 @@ def test_multigroup(self, left, right): result = merge_ordered(left, right, on="key", left_by="group") assert result["group"].notna().all() + @pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" + ) def test_merge_type(self, left, right): class NotADataFrame(DataFrame): @property diff --git a/pandas/tests/series/methods/test_to_frame.py b/pandas/tests/series/methods/test_to_frame.py index 01e547aa34b47..0eadf696b34cc 100644 --- a/pandas/tests/series/methods/test_to_frame.py +++ b/pandas/tests/series/methods/test_to_frame.py @@ -1,3 +1,5 @@ +import pytest + from pandas import ( DataFrame, Index, @@ -40,6 +42,9 @@ def test_to_frame(self, datetime_series): ) tm.assert_frame_equal(rs, xp) + @pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" + ) def test_to_frame_expanddim(self): # GH#9762 diff --git a/pandas/tests/series/test_subclass.py b/pandas/tests/series/test_subclass.py index a3550c6de6780..c2d5afcf884b1 100644 --- a/pandas/tests/series/test_subclass.py +++ b/pandas/tests/series/test_subclass.py @@ -4,6 +4,10 @@ import pandas as pd import pandas._testing as tm +pytestmark = pytest.mark.filterwarnings( + "ignore:Passing a BlockManager|Passing a SingleBlockManager:DeprecationWarning" +) + class TestSeriesSubclassing: @pytest.mark.parametrize(