Backport PR pandas-dev#39191: Revert "BUG/REG: RollingGroupby MultiIn…

…dex levels dropped (pandas-dev#38737)"
meeseeksmachine · Jan 16, 2021 · 34177bc · 34177bc
1 parent 6d599cb
commit 34177bc
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 55 deletions.
diff --git a/doc/source/whatsnew/v1.2.1.rst b/doc/source/whatsnew/v1.2.1.rst
@@ -15,7 +15,6 @@ including other versions of pandas.
 Fixed regressions
 ~~~~~~~~~~~~~~~~~
 - Fixed regression in :meth:`to_csv` that created corrupted zip files when there were more rows than ``chunksize`` (:issue:`38714`)
-- Fixed regression in ``groupby().rolling()`` where :class:`MultiIndex` levels were dropped (:issue:`38523`)
 - Fixed regression in repr of float-like strings of an ``object`` dtype having trailing 0's truncated after the decimal (:issue:`38708`)
 - Fixed regression in :meth:`DataFrame.groupby()` with :class:`Categorical` grouping column not showing unused categories for ``grouped.indices`` (:issue:`38642`)
 - Fixed regression in :meth:`DataFrame.any` and :meth:`DataFrame.all` not returning a result for tz-aware ``datetime64`` columns (:issue:`38723`)

diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
@@ -108,7 +108,7 @@
     Note this does not influence the order of observations within each
     group. Groupby preserves the order of rows within each group.
 group_keys : bool, default True
-    When calling ``groupby().apply()``, add group keys to index to identify pieces.
+    When calling apply, add group keys to index to identify pieces.
 squeeze : bool, default False
     Reduce the dimensionality of the return type if possible,
     otherwise return a consistent type.

diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py
@@ -767,22 +767,28 @@ def _apply(
             numba_cache_key,
             **kwargs,
         )
-        # Reconstruct the resulting MultiIndex
+        # Reconstruct the resulting MultiIndex from tuples
         # 1st set of levels = group by labels
-        # 2nd set of levels = original DataFrame/Series index
-        grouped_object_index = self.obj.index
-        grouped_index_name = [*grouped_object_index.names]
-        groupby_keys = [grouping.name for grouping in self._groupby.grouper._groupings]
-        result_index_names = groupby_keys + grouped_index_name
+        # 2nd set of levels = original index
+        # Ignore 2nd set of levels if a group by label include an index level
+        result_index_names = [
+            grouping.name for grouping in self._groupby.grouper._groupings
+        ]
+        grouped_object_index = None
 
-        drop_columns = [
+        column_keys = [
             key
-            for key in groupby_keys
+            for key in result_index_names
             if key not in self.obj.index.names or key is None
         ]
-        if len(drop_columns) != len(groupby_keys):
-            # Our result will have kept groupby columns which should be dropped
-            result = result.drop(columns=drop_columns, errors="ignore")
+
+        if len(column_keys) == len(result_index_names):
+            grouped_object_index = self.obj.index
+            grouped_index_name = [*grouped_object_index.names]
+            result_index_names += grouped_index_name
+        else:
+            # Our result will have still kept the column in the result
+            result = result.drop(columns=column_keys, errors="ignore")
 
         codes = self._groupby.grouper.codes
         levels = self._groupby.grouper.levels

diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py
@@ -556,31 +556,23 @@ def test_groupby_rolling_nans_in_index(self, rollings, key):
         with pytest.raises(ValueError, match=f"{key} must be monotonic"):
             df.groupby("c").rolling("60min", **rollings)
 
-    @pytest.mark.parametrize("group_keys", [True, False])
-    def test_groupby_rolling_group_keys(self, group_keys):
+    def test_groupby_rolling_group_keys(self):
         # GH 37641
-        # GH 38523: GH 37641 actually was not a bug.
-        # group_keys only applies to groupby.apply directly
         arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
         index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
 
         s = Series([1, 2, 3], index=index)
-        result = s.groupby(["idx1", "idx2"], group_keys=group_keys).rolling(1).mean()
+        result = s.groupby(["idx1", "idx2"], group_keys=False).rolling(1).mean()
         expected = Series(
             [1.0, 2.0, 3.0],
             index=MultiIndex.from_tuples(
-                [
-                    ("val1", "val1", "val1", "val1"),
-                    ("val1", "val1", "val1", "val1"),
-                    ("val2", "val2", "val2", "val2"),
-                ],
-                names=["idx1", "idx2", "idx1", "idx2"],
+                [("val1", "val1"), ("val1", "val1"), ("val2", "val2")],
+                names=["idx1", "idx2"],
             ),
         )
         tm.assert_series_equal(result, expected)
 
     def test_groupby_rolling_index_level_and_column_label(self):
-        # The groupby keys should not appear as a resulting column
         arrays = [["val1", "val1", "val2"], ["val1", "val1", "val2"]]
         index = MultiIndex.from_arrays(arrays, names=("idx1", "idx2"))
 
@@ -589,12 +581,7 @@ def test_groupby_rolling_index_level_and_column_label(self):
         expected = DataFrame(
             {"B": [0.0, 1.0, 2.0]},
             index=MultiIndex.from_tuples(
-                [
-                    ("val1", 1, "val1", "val1"),
-                    ("val1", 1, "val1", "val1"),
-                    ("val2", 2, "val2", "val2"),
-                ],
-                names=["idx1", "A", "idx1", "idx2"],
+                [("val1", 1), ("val1", 1), ("val2", 2)], names=["idx1", "A"]
             ),
         )
         tm.assert_frame_equal(result, expected)
@@ -653,30 +640,6 @@ def test_groupby_rolling_resulting_multiindex(self):
         )
         tm.assert_index_equal(result.index, expected_index)
 
-    def test_groupby_level(self):
-        # GH 38523
-        arrays = [
-            ["Falcon", "Falcon", "Parrot", "Parrot"],
-            ["Captive", "Wild", "Captive", "Wild"],
-        ]
-        index = MultiIndex.from_arrays(arrays, names=("Animal", "Type"))
-        df = DataFrame({"Max Speed": [390.0, 350.0, 30.0, 20.0]}, index=index)
-        result = df.groupby(level=0)["Max Speed"].rolling(2).sum()
-        expected = Series(
-            [np.nan, 740.0, np.nan, 50.0],
-            index=MultiIndex.from_tuples(
-                [
-                    ("Falcon", "Falcon", "Captive"),
-                    ("Falcon", "Falcon", "Wild"),
-                    ("Parrot", "Parrot", "Captive"),
-                    ("Parrot", "Parrot", "Wild"),
-                ],
-                names=["Animal", "Animal", "Type"],
-            ),
-            name="Max Speed",
-        )
-        tm.assert_series_equal(result, expected)
-
 
 class TestExpanding:
     def setup_method(self):