Skip to content

Commit

Permalink
Rebased version of pandas-dev#22486
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari committed Sep 18, 2018
1 parent 3edbaea commit e997faf
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 13 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,7 @@ Other API Changes
- :class:`pandas.io.formats.style.Styler` supports a ``number-format`` property when using :meth:`~pandas.io.formats.style.Styler.to_excel` (:issue:`22015`)
- :meth:`DataFrame.corr` and :meth:`Series.corr` now raise a ``ValueError`` along with a helpful error message instead of a ``KeyError`` when supplied with an invalid method (:issue:`22298`)
- :meth:`shift` will now always return a copy, instead of the previous behaviour of returning self when shifting by 0 (:issue:`22397`)
- :meth:`DataFrame.set_index` now raises a ``TypeError`` for incorrect types, has an improved ``KeyError`` message, and will not fail on duplicate column names with ``drop=True``. (:issue:`22484`)

.. _whatsnew_0240.deprecations:

Expand Down
18 changes: 17 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
is_named_tuple)
from pandas.core.dtypes.concat import _get_sliced_frame_result_type
from pandas.core.dtypes.missing import isna, notna

from pandas.core.dtypes.generic import ABCIndexClass, ABCMultiIndex, ABCSeries

from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import (Index, MultiIndex, ensure_index,
Expand Down Expand Up @@ -3898,6 +3898,22 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
if not isinstance(keys, list):
keys = [keys]

missing = []
for x in keys:
if not (is_scalar(x) or isinstance(x, tuple)):
if not isinstance(x, (ABCSeries, ABCIndexClass, ABCMultiIndex,
list, np.ndarray)):
raise TypeError('keys may only contain a combination of '
'the following: valid column keys, '
'Series, Index, MultiIndex, list or '
'np.ndarray')
else:
if x not in self:
missing.append(x)

if missing:
raise KeyError('{}'.format(missing))

vi = verify_integrity
return super(DataFrame, self).set_index(keys=keys, drop=drop,
append=append, inplace=inplace,
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
raise ValueError('Index has duplicate keys: {dup}'.format(
dup=duplicates))

for c in to_remove:
# use set to handle duplicate column names gracefully in case of drop
for c in set(to_remove):
del obj[c]

# clear up memory usage
Expand Down
34 changes: 23 additions & 11 deletions pandas/tests/frame/test_alter_axes.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,18 +186,19 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,

# == gives ambiguous Boolean for Series
if drop and keys[0] is 'A' and keys[1] is 'A':
with tm.assert_raises_regex(KeyError, '.*'):
df.set_index(keys, drop=drop, append=append)
# can't drop same column twice
first_drop = False
else:
result = df.set_index(keys, drop=drop, append=append)
first_drop = drop

# to test against already-tested behavior, we add sequentially,
# hence second append always True; must wrap in list, otherwise
# list-box will be illegal
expected = df.set_index([keys[0]], drop=drop, append=append)
expected = expected.set_index([keys[1]], drop=drop, append=True)
# to test against already-tested behaviour, we add sequentially,
# hence second append always True; must wrap in list, otherwise
# list-box will be illegal
expected = df.set_index([keys[0]], drop=first_drop, append=append)
expected = expected.set_index([keys[1]], drop=drop, append=True)

tm.assert_frame_equal(result, expected)
result = df.set_index(keys, drop=drop, append=append)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize('append', [True, False])
@pytest.mark.parametrize('drop', [True, False])
Expand Down Expand Up @@ -229,13 +230,24 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
def test_set_index_raise(self, frame_of_index_cols, drop, append):
df = frame_of_index_cols

with tm.assert_raises_regex(KeyError, '.*'): # column names are A-E
with tm.assert_raises_regex(KeyError, "['foo', 'bar', 'baz']"):
# column names are A-E
df.set_index(['foo', 'bar', 'baz'], drop=drop, append=append)

# non-existent key in list with arrays
with tm.assert_raises_regex(KeyError, '.*'):
with tm.assert_raises_regex(KeyError, 'X'):
df.set_index([df['A'], df['B'], 'X'], drop=drop, append=append)

rgx = 'keys may only contain a combination of the following:.*'
# forbidden type, e.g. set
with tm.assert_raises_regex(TypeError, rgx):
df.set_index(set(df['A']), drop=drop, append=append)

# forbidden type in list, e.g. set
with tm.assert_raises_regex(TypeError, rgx):
df.set_index(['A', df['A'], set(df['A'])],
drop=drop, append=append)

def test_construction_with_categorical_index(self):
ci = tm.makeCategoricalIndex(10)
ci.name = 'B'
Expand Down

0 comments on commit e997faf

Please sign in to comment.