Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: ignoring missing labels when indexing on MultiIndex level #42351

Merged
merged 3 commits into from
Jul 14, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ Deprecations
- Deprecated ``method`` argument in :meth:`Index.get_loc`, use ``index.get_indexer([label], method=...)`` instead (:issue:`42269`)
- Deprecated treating integer keys in :meth:`Series.__setitem__` as positional when the index is a :class:`Float64Index` not containing the key, a :class:`IntervalIndex` with no entries containing the key, or a :class:`MultiIndex` with leading :class:`Float64Index` level not containing the key (:issue:`33469`)
- Deprecated treating ``numpy.datetime64`` objects as UTC times when passed to the :class:`Timestamp` constructor along with a timezone. In a future version, these will be treated as wall-times. To retain the old behavior, use ``Timestamp(dt64).tz_localize("UTC").tz_convert(tz)`` (:issue:`24559`)
-
- Deprecated ignoring missing labels when indexing with a sequence of labels on a level of a MultiIndex (:issue:`42351`)

.. ---------------------------------------------------------------------------

Expand Down
12 changes: 12 additions & 0 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -3268,6 +3268,18 @@ def _update_indexer(idxr: Index, indexer: Index) -> Index:
)
except KeyError:
# ignore not founds; see discussion in GH#39424
warnings.warn(
"The behavior of indexing on a MultiIndex with a nested "
"sequence of labels is deprecated and will change in a "
"future version. `series.loc[label, sequence]` will "
"raise if any members of 'sequence' or not present in "
"the index's second level. To retain the old behavior, "
"use `series.index.isin(sequence, level=1)`",
# TODO: how to opt in to the future behavior?
# TODO: how to handle IntervalIndex level? (no test cases)
FutureWarning,
stacklevel=7,
)
continue
else:
idxrs = _convert_to_indexer(item_lvl_indexer)
Expand Down
15 changes: 11 additions & 4 deletions pandas/tests/indexing/multiindex/test_loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,14 +398,21 @@ def test_loc_getitem_duplicates_multiindex_missing_indexers(indexer, pos):
idx = MultiIndex.from_product(
[["A", "B", "C"], ["foo", "bar", "baz"]], names=["one", "two"]
)
s = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
expected = s.iloc[pos]
ser = Series(np.arange(9, dtype="int64"), index=idx).sort_index()
expected = ser.iloc[pos]

if expected.size == 0 and indexer != []:
with pytest.raises(KeyError, match=str(indexer)):
s.loc[indexer]
ser.loc[indexer]
else:
result = s.loc[indexer]
warn = None
msg = "MultiIndex with a nested sequence"
if indexer == (slice(None), ["foo", "bah"]):
# "bah" is not in idx.levels[1], so is ignored, will raise KeyError
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
result = ser.loc[indexer]
tm.assert_series_equal(result, expected)


Expand Down
36 changes: 33 additions & 3 deletions pandas/tests/io/formats/style/test_style.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,10 +627,27 @@ def test_applymap_subset(self, slice_):
def test_applymap_subset_multiindex(self, slice_):
# GH 19861
# edited for GH 33562
warn = None
msg = "indexing on a MultiIndex with a nested sequence of labels"
if (
isinstance(slice_[-1], tuple)
and isinstance(slice_[-1][-1], list)
and "C" in slice_[-1][-1]
):
warn = FutureWarning
elif (
isinstance(slice_[0], tuple)
and isinstance(slice_[0][1], list)
and 3 in slice_[0][1]
):
warn = FutureWarning

idx = MultiIndex.from_product([["a", "b"], [1, 2]])
col = MultiIndex.from_product([["x", "y"], ["A", "B"]])
df = DataFrame(np.random.rand(4, 4), columns=col, index=idx)
df.style.applymap(lambda x: "color: red;", subset=slice_).render()

with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
df.style.applymap(lambda x: "color: red;", subset=slice_).render()

def test_applymap_subset_multiindex_code(self):
# https://github.com/pandas-dev/pandas/issues/25858
Expand Down Expand Up @@ -1438,6 +1455,19 @@ def test_non_reducing_multi_slice_on_multiindex(self, slice_):
idxs = MultiIndex.from_product([["U", "V"], ["W", "X"], ["Y", "Z"]])
df = DataFrame(np.arange(64).reshape(8, 8), columns=cols, index=idxs)

expected = df.loc[slice_]
result = df.loc[non_reducing_slice(slice_)]
msg = "indexing on a MultiIndex with a nested sequence of labels"
warn = None
for lvl in [0, 1]:
key = slice_[lvl]
if isinstance(key, tuple):
for subkey in key:
if isinstance(subkey, list) and "-" in subkey:
# not present in the index level, ignored, will raise in future
warn = FutureWarning

with tm.assert_produces_warning(warn, match=msg):
expected = df.loc[slice_]

with tm.assert_produces_warning(warn, match=msg):
result = df.loc[non_reducing_slice(slice_)]
tm.assert_frame_equal(result, expected)