From e01b88bbc1b2b46574b9294bc7b7004e0026a464 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 18 May 2018 08:10:21 +0200 Subject: [PATCH 1/5] API: add droplevel() to flat indexes, for compatibility with MultiIndex closes #21115 --- doc/source/whatsnew/v0.23.1.txt | 1 + pandas/core/indexes/base.py | 54 +++++++++++++++++++++++++++++++++ pandas/core/indexes/multi.py | 46 ---------------------------- 3 files changed, 55 insertions(+), 46 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index 9382d74f95295..0071f315851df 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -65,6 +65,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`Series.reset_index` where appropriate error was not raised with an invalid level name (:issue:`20925`) +- :meth:`Index.droplevel` is now implemented also for flat indexes, for compatibility with MultiIndex (:issue:`21115`) - I/O diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index df39eb5fd8312..f79288c167356 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -3158,6 +3158,60 @@ def _get_level_values(self, level): get_level_values = _get_level_values + def droplevel(self, level=0): + """ + Return index with requested level(s) removed. If resulting index has + only 1 level left, the result will be of Index type, not MultiIndex. + + .. versionadded:: 0.23.1 (support for non-MultiIndex) + + Parameters + ---------- + level : int, str, or list-like, default 0 + If a string is given, must be the name of a level + If list-like, elements must be names or indexes of levels. + + Returns + ------- + index : Index or MultiIndex + """ + if not isinstance(level, (tuple, list)): + level = [level] + + levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] + + if len(level) == 0: + return self + if len(level) >= self.nlevels: + raise ValueError("Cannot remove {} levels from an index with {} " + "levels: at least one level must be " + "left.".format(len(level), self.nlevels)) + # The two checks above guarantee that here self is a MultiIndex + + new_levels = list(self.levels) + new_labels = list(self.labels) + new_names = list(self.names) + + for i in levnums: + new_levels.pop(i) + new_labels.pop(i) + new_names.pop(i) + + if len(new_levels) == 1: + + # set nan if needed + mask = new_labels[0] == -1 + result = new_levels[0].take(new_labels[0]) + if mask.any(): + result = result.putmask(mask, np.nan) + + result.name = new_names[0] + return result + else: + from .multi import MultiIndex + return MultiIndex(levels=new_levels, labels=new_labels, + names=new_names, verify_integrity=False) + _index_shared_docs['get_indexer'] = """ Compute indexer and mask for new index given the current index. The indexer should be then used as an input to ndarray.take to align the diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index fbcf06a28c1e5..ea0fab7e17648 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -1761,52 +1761,6 @@ def _drop_from_level(self, labels, level): return self[mask] - def droplevel(self, level=0): - """ - Return Index with requested level removed. If MultiIndex has only 2 - levels, the result will be of Index type not MultiIndex. - - Parameters - ---------- - level : int/level name or list thereof - - Notes - ----- - Does not check if result index is unique or not - - Returns - ------- - index : Index or MultiIndex - """ - levels = level - if not isinstance(levels, (tuple, list)): - levels = [level] - - new_levels = list(self.levels) - new_labels = list(self.labels) - new_names = list(self.names) - - levnums = sorted(self._get_level_number(lev) for lev in levels)[::-1] - - for i in levnums: - new_levels.pop(i) - new_labels.pop(i) - new_names.pop(i) - - if len(new_levels) == 1: - - # set nan if needed - mask = new_labels[0] == -1 - result = new_levels[0].take(new_labels[0]) - if mask.any(): - result = result.putmask(mask, np.nan) - - result.name = new_names[0] - return result - else: - return MultiIndex(levels=new_levels, labels=new_labels, - names=new_names, verify_integrity=False) - def swaplevel(self, i=-2, j=-1): """ Swap level i with level j. From fc6a29783383a6f442e3d879c00dcd7658be60f5 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 18 May 2018 08:16:17 +0200 Subject: [PATCH 2/5] CLN: remove unneeded checks --- pandas/core/frame.py | 5 ++--- pandas/core/series.py | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 77a67c048a48d..0986ed289e603 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4096,9 +4096,8 @@ def _maybe_casted_values(index, labels=None): if not isinstance(level, (tuple, list)): level = [level] level = [self.index._get_level_number(lev) for lev in level] - if isinstance(self.index, MultiIndex): - if len(level) < self.index.nlevels: - new_index = self.index.droplevel(level) + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) if not drop: if isinstance(self.index, MultiIndex): diff --git a/pandas/core/series.py b/pandas/core/series.py index 6d396e845219e..1716116112613 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -1199,9 +1199,8 @@ def reset_index(self, level=None, drop=False, name=None, inplace=False): if not isinstance(level, (tuple, list)): level = [level] level = [self.index._get_level_number(lev) for lev in level] - if isinstance(self.index, MultiIndex): - if len(level) < self.index.nlevels: - new_index = self.index.droplevel(level) + if len(level) < self.index.nlevels: + new_index = self.index.droplevel(level) if inplace: self.index = new_index From 56f1568a1dc17043bcb90f3a240abf61160d44b2 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 18 May 2018 10:11:06 +0200 Subject: [PATCH 3/5] TST: Add droplevel tests for corner cases --- pandas/tests/indexes/test_base.py | 19 +++++++++++++++++++ pandas/tests/indexes/test_multi.py | 12 +++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f4fa547574b9e..7fc00ed8f5411 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -245,6 +245,25 @@ def test_constructor_int_dtype_nan(self): result = Index(data, dtype='float') tm.assert_index_equal(result, expected) + def test_droplevel(self, indices): + # GH 21115 + if isinstance(indices, MultiIndex): + # Tested separately in test_multi.py + return + + assert indices.droplevel([]).equals(indices) + + for level in indices.name, [indices.name]: + if isinstance(indices.name, tuple) and level is indices.name: + # GH 21121 : droplevel with tuple name + continue + with pytest.raises(ValueError): + indices.droplevel(level) + + for level in 'wrong', ['wrong']: + with pytest.raises(KeyError): + indices.droplevel(level) + @pytest.mark.parametrize("dtype", ['int64', 'uint64']) def test_constructor_int_dtype_nan_raises(self, dtype): # see gh-15187 diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index 37f70090c179f..e43a1f062ef9d 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -2078,7 +2078,7 @@ def test_droplevel_with_names(self): expected = index.droplevel(1) assert dropped.equals(expected) - def test_droplevel_multiple(self): + def test_droplevel_list(self): index = MultiIndex( levels=[Index(lrange(4)), Index(lrange(4)), Index(lrange(4))], labels=[np.array([0, 0, 1, 2, 2, 2, 3, 3]), np.array( @@ -2089,6 +2089,16 @@ def test_droplevel_multiple(self): expected = index[:2].droplevel(2).droplevel(0) assert dropped.equals(expected) + dropped = index[:2].droplevel([]) + expected = index[:2] + assert dropped.equals(expected) + + with pytest.raises(ValueError): + index[:2].droplevel(['one', 'two', 'three']) + + with pytest.raises(KeyError): + index[:2].droplevel(['one', 'four']) + def test_drop_not_lexsorted(self): # GH 12078 From 5cf4957a2cc8429170742dd1e6435ddb8c323811 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Fri, 18 May 2018 10:13:32 +0200 Subject: [PATCH 4/5] CLN: replace lambdas with defs --- pandas/core/series.py | 3 ++- pandas/tests/indexes/test_multi.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 1716116112613..7947ce576dc6f 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3176,7 +3176,8 @@ def apply(self, func, convert_dtype=True, args=(), **kwds): # handle ufuncs and lambdas if kwds or args and not isinstance(func, np.ufunc): - f = lambda x: func(x, *args, **kwds) + def f(x): + return func(x, *args, **kwds) else: f = func diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index e43a1f062ef9d..c9f6bc9151d00 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -99,7 +99,8 @@ def test_where_array_like(self): cond = [False, True] for klass in klasses: - f = lambda: i.where(klass(cond)) + def f(): + return i.where(klass(cond)) pytest.raises(NotImplementedError, f) def test_repeat(self): @@ -2415,7 +2416,8 @@ def check(nlevels, with_nulls): # with a dup if with_nulls: - f = lambda a: np.insert(a, 1000, a[0]) + def f(a): + return np.insert(a, 1000, a[0]) labels = list(map(f, labels)) index = MultiIndex(levels=levels, labels=labels) else: From 57a5332f3125a4dfbf165a454325fa611a5a3429 Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Mon, 21 May 2018 13:14:00 +0200 Subject: [PATCH 5/5] DOC: New section in api.rst - compatibility with MultiIndex --- doc/source/api.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/api.rst b/doc/source/api.rst index d00e5511f1100..4faec93490fde 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -1459,7 +1459,6 @@ Modifying and Computations Index.is_floating Index.is_integer Index.is_interval - Index.is_lexsorted_for_tuple Index.is_mixed Index.is_numeric Index.is_object @@ -1471,11 +1470,19 @@ Modifying and Computations Index.where Index.take Index.putmask - Index.set_names Index.unique Index.nunique Index.value_counts +Compatibility with MultiIndex +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +.. autosummary:: + :toctree: generated/ + + Index.set_names + Index.is_lexsorted_for_tuple + Index.droplevel + Missing Values ~~~~~~~~~~~~~~ .. autosummary::