From 6ed5edd94f21ede67c9031c3356f0b197b40b110 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Aug 2017 12:53:03 -0500 Subject: [PATCH 1/3] API: Have MultiIndex constructors return MI This removes the special case for MultiIndex constructors returning an Index if all the levels are length-1. Now this will return a MultiIndex with a single level. This is a backwards incompatabile change, with no clear method for deprecation, so we're making a clean break. Closes #17178 --- doc/source/whatsnew/v0.21.0.txt | 21 +++++++++++++++++++++ pandas/core/frame.py | 11 ++++++----- pandas/core/indexes/api.py | 12 ++++++++---- pandas/core/indexes/base.py | 27 +++++++++++++++++++++++++++ pandas/core/indexes/multi.py | 10 ---------- pandas/core/reshape/reshape.py | 18 ++++++++++++------ pandas/core/sparse/scipy_sparse.py | 6 +++++- pandas/core/strings.py | 5 ++++- pandas/io/parsers.py | 21 +++++++++++++++------ pandas/tests/indexes/test_base.py | 18 +++++++++++++++++- pandas/tests/indexes/test_multi.py | 20 +++++++++----------- pandas/util/testing.py | 3 +++ 12 files changed, 127 insertions(+), 45 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 33b7e128ef8bf..80c6eb05d7654 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -273,6 +273,27 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement. +.. _whatsnew_210.api.multiindex_single: + +MultiIndex Constructor with a Single Level +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Previous versions would automatically squeeze a ``MultiIndex`` with length-one +``levels`` down to an ``Index``: + +.. code-block:: ipython + + In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)]) + Out[2]: Index(['a', 'b'], dtype='object') + +Length 1 levels are no longer special-cased. They behave exactly as if you had +length 2+ levels, so a :class:`MultiIndex` is always returned from all of the +``MultiIndex`` constructors: + +.. ipython:: python + + pd.MultiIndex.from_tuples([('a',), ('b',)]) + .. _whatsnew_0210.api: Other API Changes diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b5b3df64d24c0..20b440be5c869 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -67,7 +67,8 @@ _dict_compat, standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs -from pandas.core.index import Index, MultiIndex, _ensure_index +from pandas.core.index import (Index, MultiIndex, _ensure_index, + _index_from_sequences) from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) from pandas.core.internals import (BlockManager, @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None, else: try: to_remove = [arr_columns.get_loc(field) for field in index] - - result_index = MultiIndex.from_arrays( - [arrays[i] for i in to_remove], names=index) + index_data = [arrays[i] for i in to_remove] + result_index = _index_from_sequences(index_data, + names=index) exclude.update(index) except Exception: @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) arrays.append(level) - index = MultiIndex.from_arrays(arrays, names=names) + index = _index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 323d50166e7b6..a7860440def7c 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,6 +1,9 @@ -from pandas.core.indexes.base import (Index, _new_Index, # noqa - _ensure_index, _get_na_value, - InvalidIndexError) +from pandas.core.indexes.base import (Index, + _new_Index, + _ensure_index, + _index_from_sequences, + _get_na_value, + InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa from pandas.core.indexes.multi import MultiIndex # noqa from pandas.core.indexes.interval import IntervalIndex # noqa @@ -22,7 +25,8 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_get_na_value', '_get_combined_index', + '_ensure_index', '_index_from_sequences', '_get_na_value', + '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', '_all_indexes_same'] diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 31cf1e48b8529..3708d4f5fd22b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4012,6 +4012,33 @@ def invalid_op(self, other=None): Index._add_comparison_methods() +def _index_from_sequences(sequences, names=None): + """Construct an index from sequences of data. + + A single sequence returns an Index. + Many sequences returns a MultiIndex. + + Examples + -------- + + >>> _index_from_sequences([[1, 2, 3]], names=['name']) + Int64Index([1, 2, 3], dtype='int64', name='name') + + >>> _index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2']) + MultiIndex(levels=[['a'], ['a', 'b']], + labels=[[0, 0], [0, 1]], + names=['L1', 'L2']) + """ + from .multi import MultiIndex + + if len(sequences) == 1: + if names is not None: + names = names[0] + return Index(sequences[0], name=names) + else: + return MultiIndex.from_arrays(sequences, names=names) + + def _ensure_index(index_like, copy=False): if isinstance(index_like, Index): if copy: diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index ea45b4700172f..d7d5b6d128a2c 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None, raise ValueError('Length of levels and labels must be the same.') if len(levels) == 0: raise ValueError('Must pass non-zero number of levels/labels') - if len(levels) == 1: - if names: - name = names[0] - else: - name = None - return Index(levels[0], name=name, copy=True).take(labels[0]) result = object.__new__(MultiIndex) @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None): MultiIndex.from_product : Make a MultiIndex from cartesian product of iterables """ - if len(arrays) == 1: - name = None if names is None else names[0] - return Index(arrays[0], name=name) - # Check if lengths of all arrays are equal or not, # raise ValueError, if not for i in range(1, len(arrays)): diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 455da9246783c..dc45bf70992ee 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -31,7 +31,7 @@ from pandas.core.frame import _shared_docs from pandas.util._decorators import Appender -from pandas.core.index import MultiIndex, _get_na_value +from pandas.core.index import Index, MultiIndex, _get_na_value class _Unstacker(object): @@ -311,10 +311,13 @@ def _unstack_multiple(data, clocs): recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels, xnull=False) - dummy_index = MultiIndex(levels=rlevels + [obs_ids], - labels=rlabels + [comp_ids], - names=rnames + ['__placeholder__'], - verify_integrity=False) + if rlocs == []: + dummy_index = Index(obs_ids, name='__placeholder__') + else: + dummy_index = MultiIndex(levels=rlevels + [obs_ids], + labels=rlabels + [comp_ids], + names=rnames + ['__placeholder__'], + verify_integrity=False) if isinstance(data, Series): dummy = data.copy() @@ -446,7 +449,10 @@ def _slow_pivot(index, columns, values): def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): - return _unstack_multiple(obj, level) + if len(level) == 1: + level = level[0] + else: + return _unstack_multiple(obj, level) if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): diff --git a/pandas/core/sparse/scipy_sparse.py b/pandas/core/sparse/scipy_sparse.py index ea108e3e89935..d2b9583d8efe5 100644 --- a/pandas/core/sparse/scipy_sparse.py +++ b/pandas/core/sparse/scipy_sparse.py @@ -71,7 +71,11 @@ def robust_get_level_values(i): labels_to_i = Series(labels_to_i) if len(subset) > 1: labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index) - labels_to_i.index.names = [index.names[i] for i in subset] + labels_to_i.index.names = [index.names[i] for i in subset] + else: + labels_to_i.index = Index(x[0] for x in labels_to_i.index) + labels_to_i.index.name = index.names[subset[0]] + labels_to_i.name = 'value' return (labels_to_i) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 2f95e510bba5e..8efb6d7959954 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1452,7 +1452,10 @@ def cons_row(x): if expand: result = list(result) - return MultiIndex.from_tuples(result, names=name) + out = MultiIndex.from_tuples(result, names=name) + if out.nlevels == 1: + out = out.get_level_values(0) + return out else: return Index(result, name=name) else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index a9821be3fa5e2..9bb734070ee75 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -23,7 +23,8 @@ is_scalar, is_categorical_dtype) from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe -from pandas.core.index import Index, MultiIndex, RangeIndex +from pandas.core.index import (Index, MultiIndex, RangeIndex, + _index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical @@ -1444,7 +1445,16 @@ def _agg_index(self, index, try_parse_dates=True): arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues) arrays.append(arr) - index = MultiIndex.from_arrays(arrays, names=self.index_names) + names = self.index_names + index = _index_from_sequences(arrays, names) + if len(arrays) > 1: + index = MultiIndex.from_arrays(arrays, names=self.index_names) + else: + if self.index_names is None: + name = None + else: + name = self.index_names[0] + index = Index(arrays[0], name=name) return index @@ -1808,7 +1818,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = MultiIndex.from_arrays(arrays) + index = _index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -3138,9 +3148,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): if index_col is None or index_col is False: index = Index([]) else: - index = [Series([], dtype=dtype[index_name]) - for index_name in index_names] - index = MultiIndex.from_arrays(index, names=index_names) + data = [Series([], dtype=dtype[name]) for name in index_names] + index = _index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 07e98c326bcaa..84efba9857286 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -17,7 +17,7 @@ DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, isna) -from pandas.core.index import _get_combined_index +from pandas.core.index import _get_combined_index, _index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self): res = i2.intersection(i1) assert len(res) == 0 + + +class TestIndexUtils(object): + + @pytest.mark.parametrize('data, names, expected', [ + ([[1, 2, 3]], None, Index([1, 2, 3])), + ([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')), + ([['a', 'a'], ['c', 'd']], None, + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])), + ([['a', 'a'], ['c', 'd']], ['L1', 'L2'], + MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]], + names=['L1', 'L2'])), + ]) + def test_index_from_sequences(self, data, names, expected): + result = _index_from_sequences(data, names) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index c66775f4690cc..798d244468961 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -537,15 +537,12 @@ def test_astype(self): self.index.astype(np.dtype(int)) def test_constructor_single_level(self): - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]], names=['first']) - assert isinstance(single_level, Index) - assert not isinstance(single_level, MultiIndex) - assert single_level.name == 'first' - - single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], - labels=[[0, 1, 2, 3]]) - assert single_level.name is None + result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']], + labels=[[0, 1, 2, 3]], names=['first']) + assert isinstance(result, MultiIndex) + expected = Index(['foo', 'bar', 'baz', 'qux'], name='first') + tm.assert_index_equal(result.levels[0], expected) + assert result.names == ['first'] def test_constructor_no_levels(self): tm.assert_raises_regex(ValueError, "non-zero number " @@ -768,8 +765,9 @@ def test_from_arrays_empty(self): # 1 level result = MultiIndex.from_arrays(arrays=[[]], names=['A']) + assert isinstance(result, MultiIndex) expected = Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # N levels for N in [2, 3]: @@ -830,7 +828,7 @@ def test_from_product_empty(self): # 1 level result = MultiIndex.from_product([[]], names=['A']) expected = pd.Index([], name='A') - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result.levels[0], expected) # 2 levels l1 = [[], ['foo', 'bar', 'baz'], []] diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 5a17cb6d7dc47..b8aee992e60a6 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1910,6 +1910,9 @@ def keyfunc(x): # convert tuples to index if nentries == 1: index = Index(tuples[0], name=names[0]) + elif nlevels == 1: + name = None if names is None else names[0] + index = Index((x[0] for x in tuples), name=name) else: index = MultiIndex.from_tuples(tuples, names=names) return index From c358e8785ebf6d573cbe8a8b5a59e2cfe2166568 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 25 Aug 2017 12:04:04 -0500 Subject: [PATCH 2/3] fixup! API: Have MultiIndex constructors return MI --- pandas/core/frame.py | 8 +++--- pandas/core/indexes/api.py | 4 +-- pandas/core/indexes/base.py | 46 +++++++++++++++++++++++++++---- pandas/core/reshape/reshape.py | 3 ++ pandas/core/strings.py | 2 ++ pandas/io/parsers.py | 16 +++-------- pandas/tests/indexes/test_base.py | 6 ++-- pandas/util/testing.py | 1 + 8 files changed, 59 insertions(+), 27 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 20b440be5c869..5991ec825c841 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -68,7 +68,7 @@ standardize_mapping) from pandas.core.generic import NDFrame, _shared_docs from pandas.core.index import (Index, MultiIndex, _ensure_index, - _index_from_sequences) + _ensure_index_from_sequences) from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable, check_bool_indexer) from pandas.core.internals import (BlockManager, @@ -1157,8 +1157,8 @@ def from_records(cls, data, index=None, exclude=None, columns=None, try: to_remove = [arr_columns.get_loc(field) for field in index] index_data = [arrays[i] for i in to_remove] - result_index = _index_from_sequences(index_data, - names=index) + result_index = _ensure_index_from_sequences(index_data, + names=index) exclude.update(index) except Exception: @@ -3001,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) arrays.append(level) - index = _index_from_sequences(arrays, names) + index = _ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: duplicates = index.get_duplicates() diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index a7860440def7c..d20a0b0a2c73d 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -1,7 +1,7 @@ from pandas.core.indexes.base import (Index, _new_Index, _ensure_index, - _index_from_sequences, + _ensure_index_from_sequences, _get_na_value, InvalidIndexError) # noqa from pandas.core.indexes.category import CategoricalIndex # noqa @@ -25,7 +25,7 @@ 'InvalidIndexError', 'TimedeltaIndex', 'PeriodIndex', 'DatetimeIndex', '_new_Index', 'NaT', - '_ensure_index', '_index_from_sequences', '_get_na_value', + '_ensure_index', '_ensure_index_from_sequences', '_get_na_value', '_get_combined_index', '_get_objs_combined_axis', '_union_indexes', '_get_consensus_names', diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3708d4f5fd22b..bbab41b92f892 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4012,19 +4012,28 @@ def invalid_op(self, other=None): Index._add_comparison_methods() -def _index_from_sequences(sequences, names=None): +def _ensure_index_from_sequences(sequences, names=None): """Construct an index from sequences of data. - A single sequence returns an Index. - Many sequences returns a MultiIndex. + A single sequence returns an Index. Many sequences returns a + MultiIndex. + + Parameters + ---------- + sequences : sequence of sequences + names : sequence of str + + Returns + ------- + index : Index or MultiIndex Examples -------- - - >>> _index_from_sequences([[1, 2, 3]], names=['name']) + >>> _ensure_index_from_sequences([[1, 2, 3]], names=['name']) Int64Index([1, 2, 3], dtype='int64', name='name') - >>> _index_from_sequences([['a', 'a'], ['a', 'b']], names=['L1', 'L2']) + >>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']], + names=['L1', 'L2']) MultiIndex(levels=[['a'], ['a', 'b']], labels=[[0, 0], [0, 1]], names=['L1', 'L2']) @@ -4040,6 +4049,31 @@ def _index_from_sequences(sequences, names=None): def _ensure_index(index_like, copy=False): + """ + Ensure that we have an index from some index-like object + + Parameters + ---------- + index : sequence + An Index or other sequence + copy : bool + + Returns + ------- + index : Index or MultiIndex + + Examples + -------- + >>> _ensure_index(['a', 'b']) + Index(['a', 'b'], dtype='object') + + >>> _ensure_index([('a', 'a'), ('b', 'c')]) + Index([('a', 'a'), ('b', 'c')], dtype='object') + + >>> _ensure_index([['a', 'a'], ['b', 'c']]) + MultiIndex(levels=[['a'], ['b', 'c']], + labels=[[0, 0], [0, 1]]) + """ if isinstance(index_like, Index): if copy: index_like = index_like.copy() diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index dc45bf70992ee..42c6b24aebbef 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -312,6 +312,7 @@ def _unstack_multiple(data, clocs): xnull=False) if rlocs == []: + # Everything is in clocs, so the dummy df has a regular index dummy_index = Index(obs_ids, name='__placeholder__') else: dummy_index = MultiIndex(levels=rlevels + [obs_ids], @@ -450,6 +451,8 @@ def _slow_pivot(index, columns, values): def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): if len(level) == 1: + # unstack_multiple only handles MultiIndexes, + # and isn't needed for a single level level = level[0] else: return _unstack_multiple(obj, level) diff --git a/pandas/core/strings.py b/pandas/core/strings.py index 8efb6d7959954..48bc2ee05dd68 100644 --- a/pandas/core/strings.py +++ b/pandas/core/strings.py @@ -1454,6 +1454,8 @@ def cons_row(x): result = list(result) out = MultiIndex.from_tuples(result, names=name) if out.nlevels == 1: + # We had all tuples of length-one, which are + # better represented as a regular Index. out = out.get_level_values(0) return out else: diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 9bb734070ee75..8b1a921536a1d 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -24,7 +24,7 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.cast import astype_nansafe from pandas.core.index import (Index, MultiIndex, RangeIndex, - _index_from_sequences) + _ensure_index_from_sequences) from pandas.core.series import Series from pandas.core.frame import DataFrame from pandas.core.categorical import Categorical @@ -1446,15 +1446,7 @@ def _agg_index(self, index, try_parse_dates=True): arrays.append(arr) names = self.index_names - index = _index_from_sequences(arrays, names) - if len(arrays) > 1: - index = MultiIndex.from_arrays(arrays, names=self.index_names) - else: - if self.index_names is None: - name = None - else: - name = self.index_names[0] - index = Index(arrays[0], name=name) + index = _ensure_index_from_sequences(arrays, names) return index @@ -1818,7 +1810,7 @@ def read(self, nrows=None): try_parse_dates=True) arrays.append(values) - index = _index_from_sequences(arrays) + index = _ensure_index_from_sequences(arrays) if self.usecols is not None: names = self._filter_usecols(names) @@ -3149,7 +3141,7 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None): index = Index([]) else: data = [Series([], dtype=dtype[name]) for name in index_names] - index = _index_from_sequences(data, names=index_names) + index = _ensure_index_from_sequences(data, names=index_names) index_col.sort() for i, n in enumerate(index_col): columns.pop(n - i) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 84efba9857286..aa32e75ba0d58 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -17,7 +17,7 @@ DataFrame, Float64Index, Int64Index, CategoricalIndex, DatetimeIndex, TimedeltaIndex, PeriodIndex, isna) -from pandas.core.index import _get_combined_index, _index_from_sequences +from pandas.core.index import _get_combined_index, _ensure_index_from_sequences from pandas.util.testing import assert_almost_equal from pandas.compat.numpy import np_datetime64_compat @@ -2125,6 +2125,6 @@ class TestIndexUtils(object): MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]], names=['L1', 'L2'])), ]) - def test_index_from_sequences(self, data, names, expected): - result = _index_from_sequences(data, names) + def test_ensure_index_from_sequences(self, data, names, expected): + result = _ensure_index_from_sequences(data, names) tm.assert_index_equal(result, expected) diff --git a/pandas/util/testing.py b/pandas/util/testing.py index b8aee992e60a6..7dac83953ad8f 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -1909,6 +1909,7 @@ def keyfunc(x): # convert tuples to index if nentries == 1: + # we have a single level of tuples, i.e. a regular Index index = Index(tuples[0], name=names[0]) elif nlevels == 1: name = None if names is None else names[0] From 5c8205c0681b486ed73bdcb816b3728bee1a5f78 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Aug 2017 15:26:47 -0500 Subject: [PATCH 3/3] Update for comments --- doc/source/whatsnew/v0.21.0.txt | 7 +++++-- pandas/core/indexes/base.py | 8 ++++++++ pandas/core/reshape/reshape.py | 8 ++++---- 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index 80c6eb05d7654..87896778bea14 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -278,8 +278,11 @@ The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and MultiIndex Constructor with a Single Level ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previous versions would automatically squeeze a ``MultiIndex`` with length-one -``levels`` down to an ``Index``: +The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all +length-one levels down to a regular ``Index``. This affects all the +``MultiIndex`` constructors. (:issue:`17178`) + +Previous behavior: .. code-block:: ipython diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index bbab41b92f892..6a30eaefaaae7 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4037,6 +4037,10 @@ def _ensure_index_from_sequences(sequences, names=None): MultiIndex(levels=[['a'], ['a', 'b']], labels=[[0, 0], [0, 1]], names=['L1', 'L2']) + + See Also + -------- + _ensure_index """ from .multi import MultiIndex @@ -4073,6 +4077,10 @@ def _ensure_index(index_like, copy=False): >>> _ensure_index([['a', 'a'], ['b', 'c']]) MultiIndex(levels=[['a'], ['b', 'c']], labels=[[0, 0], [0, 1]]) + + See Also + -------- + _ensure_index_from_sequences """ if isinstance(index_like, Index): if copy: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 42c6b24aebbef..b4abba8026b35 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -450,12 +450,12 @@ def _slow_pivot(index, columns, values): def unstack(obj, level, fill_value=None): if isinstance(level, (tuple, list)): - if len(level) == 1: - # unstack_multiple only handles MultiIndexes, + if len(level) != 1: + # _unstack_multiple only handles MultiIndexes, # and isn't needed for a single level - level = level[0] - else: return _unstack_multiple(obj, level) + else: + level = level[0] if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex):