Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: Have MultiIndex consturctors always return a MI #17236

Merged
merged 3 commits into from
Aug 30, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,30 @@ named ``.isna()`` and ``.notna()``, these are included for classes ``Categorical

The configuration option ``pd.options.mode.use_inf_as_null`` is deprecated, and ``pd.options.mode.use_inf_as_na`` is added as a replacement.

.. _whatsnew_210.api.multiindex_single:

MultiIndex Constructor with a Single Level
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

The ``MultiIndex`` constructors no longer squeeze a MultiIndex with all
length-one levels down to a regular ``Index``. This affects all the
``MultiIndex`` constructors. (:issue:`17178`)

Previous behavior:

.. code-block:: ipython

In [2]: pd.MultiIndex.from_tuples([('a',), ('b',)])
Out[2]: Index(['a', 'b'], dtype='object')

Length 1 levels are no longer special-cased. They behave exactly as if you had
length 2+ levels, so a :class:`MultiIndex` is always returned from all of the
``MultiIndex`` constructors:

.. ipython:: python

pd.MultiIndex.from_tuples([('a',), ('b',)])

.. _whatsnew_0210.api:

Other API Changes
Expand Down
11 changes: 6 additions & 5 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@
_dict_compat,
standardize_mapping)
from pandas.core.generic import NDFrame, _shared_docs
from pandas.core.index import Index, MultiIndex, _ensure_index
from pandas.core.index import (Index, MultiIndex, _ensure_index,
_ensure_index_from_sequences)
from pandas.core.indexing import (maybe_droplevels, convert_to_index_sliceable,
check_bool_indexer)
from pandas.core.internals import (BlockManager,
Expand Down Expand Up @@ -1155,9 +1156,9 @@ def from_records(cls, data, index=None, exclude=None, columns=None,
else:
try:
to_remove = [arr_columns.get_loc(field) for field in index]

result_index = MultiIndex.from_arrays(
[arrays[i] for i in to_remove], names=index)
index_data = [arrays[i] for i in to_remove]
result_index = _ensure_index_from_sequences(index_data,
names=index)

exclude.update(index)
except Exception:
Expand Down Expand Up @@ -3000,7 +3001,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False,
to_remove.append(col)
arrays.append(level)

index = MultiIndex.from_arrays(arrays, names=names)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same (or at least that's the intent); otherwise maybe should have an option / another helper function to avoid this repetition of code.

index = _ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index.get_duplicates()
Expand Down
12 changes: 8 additions & 4 deletions pandas/core/indexes/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from pandas.core.indexes.base import (Index, _new_Index, # noqa
_ensure_index, _get_na_value,
InvalidIndexError)
from pandas.core.indexes.base import (Index,
_new_Index,
_ensure_index,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't need a noqa (if its line length, break it on the prarens

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think some of these are unused and just there to export as part of the API.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

k

_ensure_index_from_sequences,
_get_na_value,
InvalidIndexError) # noqa
from pandas.core.indexes.category import CategoricalIndex # noqa
from pandas.core.indexes.multi import MultiIndex # noqa
from pandas.core.indexes.interval import IntervalIndex # noqa
Expand All @@ -22,7 +25,8 @@
'InvalidIndexError', 'TimedeltaIndex',
'PeriodIndex', 'DatetimeIndex',
'_new_Index', 'NaT',
'_ensure_index', '_get_na_value', '_get_combined_index',
'_ensure_index', '_ensure_index_from_sequences', '_get_na_value',
'_get_combined_index',
'_get_objs_combined_axis', '_union_indexes',
'_get_consensus_names',
'_all_indexes_same']
Expand Down
69 changes: 69 additions & 0 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4012,7 +4012,76 @@ def invalid_op(self, other=None):
Index._add_comparison_methods()


def _ensure_index_from_sequences(sequences, names=None):
"""Construct an index from sequences of data.

A single sequence returns an Index. Many sequences returns a
MultiIndex.

Parameters
----------
sequences : sequence of sequences
names : sequence of str

Returns
-------
index : Index or MultiIndex

Examples
--------
>>> _ensure_index_from_sequences([[1, 2, 3]], names=['name'])
Int64Index([1, 2, 3], dtype='int64', name='name')

>>> _ensure_index_from_sequences([['a', 'a'], ['a', 'b']],
names=['L1', 'L2'])
MultiIndex(levels=[['a'], ['a', 'b']],
labels=[[0, 0], [0, 1]],
names=['L1', 'L2'])

See Also
--------
_ensure_index
"""
from .multi import MultiIndex

if len(sequences) == 1:
if names is not None:
names = names[0]
return Index(sequences[0], name=names)
else:
return MultiIndex.from_arrays(sequences, names=names)


def _ensure_index(index_like, copy=False):
"""
Ensure that we have an index from some index-like object

Parameters
----------
index : sequence
An Index or other sequence
copy : bool

Returns
-------
index : Index or MultiIndex

Examples
--------
>>> _ensure_index(['a', 'b'])
Index(['a', 'b'], dtype='object')

>>> _ensure_index([('a', 'a'), ('b', 'c')])
Index([('a', 'a'), ('b', 'c')], dtype='object')

>>> _ensure_index([['a', 'a'], ['b', 'c']])
MultiIndex(levels=[['a'], ['b', 'c']],
labels=[[0, 0], [0, 1]])

See Also
--------
_ensure_index_from_sequences
"""
if isinstance(index_like, Index):
if copy:
index_like = index_like.copy()
Expand Down
10 changes: 0 additions & 10 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,6 @@ def __new__(cls, levels=None, labels=None, sortorder=None, names=None,
raise ValueError('Length of levels and labels must be the same.')
if len(levels) == 0:
raise ValueError('Must pass non-zero number of levels/labels')
if len(levels) == 1:
if names:
name = names[0]
else:
name = None
return Index(levels[0], name=name, copy=True).take(labels[0])

result = object.__new__(MultiIndex)

Expand Down Expand Up @@ -1084,10 +1078,6 @@ def from_arrays(cls, arrays, sortorder=None, names=None):
MultiIndex.from_product : Make a MultiIndex from cartesian product
of iterables
"""
if len(arrays) == 1:
name = None if names is None else names[0]
return Index(arrays[0], name=name)

# Check if lengths of all arrays are equal or not,
# raise ValueError, if not
for i in range(1, len(arrays)):
Expand Down
21 changes: 15 additions & 6 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

from pandas.core.frame import _shared_docs
from pandas.util._decorators import Appender
from pandas.core.index import MultiIndex, _get_na_value
from pandas.core.index import Index, MultiIndex, _get_na_value


class _Unstacker(object):
Expand Down Expand Up @@ -311,10 +311,14 @@ def _unstack_multiple(data, clocs):
recons_labels = decons_obs_group_ids(comp_ids, obs_ids, shape, clabels,
xnull=False)

dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)
if rlocs == []:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment on why the if is needed

# Everything is in clocs, so the dummy df has a regular index
dummy_index = Index(obs_ids, name='__placeholder__')
else:
dummy_index = MultiIndex(levels=rlevels + [obs_ids],
labels=rlabels + [comp_ids],
names=rnames + ['__placeholder__'],
verify_integrity=False)

if isinstance(data, Series):
dummy = data.copy()
Expand Down Expand Up @@ -446,7 +450,12 @@ def _slow_pivot(index, columns, values):

def unstack(obj, level, fill_value=None):
if isinstance(level, (tuple, list)):
return _unstack_multiple(obj, level)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment on why this if is needed here

if len(level) != 1:
# _unstack_multiple only handles MultiIndexes,
# and isn't needed for a single level
return _unstack_multiple(obj, level)
else:
level = level[0]

if isinstance(obj, DataFrame):
if isinstance(obj.index, MultiIndex):
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/sparse/scipy_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def robust_get_level_values(i):
labels_to_i = Series(labels_to_i)
if len(subset) > 1:
labels_to_i.index = MultiIndex.from_tuples(labels_to_i.index)
labels_to_i.index.names = [index.names[i] for i in subset]
labels_to_i.index.names = [index.names[i] for i in subset]
else:
labels_to_i.index = Index(x[0] for x in labels_to_i.index)
labels_to_i.index.name = index.names[subset[0]]

labels_to_i.name = 'value'
return (labels_to_i)

Expand Down
7 changes: 6 additions & 1 deletion pandas/core/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,12 @@ def cons_row(x):

if expand:
result = list(result)
return MultiIndex.from_tuples(result, names=name)
out = MultiIndex.from_tuples(result, names=name)
if out.nlevels == 1:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

a comment on why this is needed

# We had all tuples of length-one, which are
# better represented as a regular Index.
out = out.get_level_values(0)
return out
else:
return Index(result, name=name)
else:
Expand Down
13 changes: 7 additions & 6 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
is_scalar, is_categorical_dtype)
from pandas.core.dtypes.missing import isna
from pandas.core.dtypes.cast import astype_nansafe
from pandas.core.index import Index, MultiIndex, RangeIndex
from pandas.core.index import (Index, MultiIndex, RangeIndex,
_ensure_index_from_sequences)
from pandas.core.series import Series
from pandas.core.frame import DataFrame
from pandas.core.categorical import Categorical
Expand Down Expand Up @@ -1444,7 +1445,8 @@ def _agg_index(self, index, try_parse_dates=True):
arr, _ = self._infer_types(arr, col_na_values | col_na_fvalues)
arrays.append(arr)

index = MultiIndex.from_arrays(arrays, names=self.index_names)
names = self.index_names
index = _ensure_index_from_sequences(arrays, names)

return index

Expand Down Expand Up @@ -1808,7 +1810,7 @@ def read(self, nrows=None):
try_parse_dates=True)
arrays.append(values)

index = MultiIndex.from_arrays(arrays)
index = _ensure_index_from_sequences(arrays)

if self.usecols is not None:
names = self._filter_usecols(names)
Expand Down Expand Up @@ -3138,9 +3140,8 @@ def _get_empty_meta(columns, index_col, index_names, dtype=None):
if index_col is None or index_col is False:
index = Index([])
else:
index = [Series([], dtype=dtype[index_name])
for index_name in index_names]
index = MultiIndex.from_arrays(index, names=index_names)
data = [Series([], dtype=dtype[name]) for name in index_names]
index = _ensure_index_from_sequences(data, names=index_names)
index_col.sort()
for i, n in enumerate(index_col):
columns.pop(n - i)
Expand Down
18 changes: 17 additions & 1 deletion pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
DataFrame, Float64Index, Int64Index,
CategoricalIndex, DatetimeIndex, TimedeltaIndex,
PeriodIndex, isna)
from pandas.core.index import _get_combined_index
from pandas.core.index import _get_combined_index, _ensure_index_from_sequences
from pandas.util.testing import assert_almost_equal
from pandas.compat.numpy import np_datetime64_compat

Expand Down Expand Up @@ -2112,3 +2112,19 @@ def test_intersect_str_dates(self):
res = i2.intersection(i1)

assert len(res) == 0


class TestIndexUtils(object):

@pytest.mark.parametrize('data, names, expected', [
([[1, 2, 3]], None, Index([1, 2, 3])),
([[1, 2, 3]], ['name'], Index([1, 2, 3], name='name')),
([['a', 'a'], ['c', 'd']], None,
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]])),
([['a', 'a'], ['c', 'd']], ['L1', 'L2'],
MultiIndex([['a'], ['c', 'd']], [[0, 0], [0, 1]],
names=['L1', 'L2'])),
])
def test_ensure_index_from_sequences(self, data, names, expected):
result = _ensure_index_from_sequences(data, names)
tm.assert_index_equal(result, expected)
20 changes: 9 additions & 11 deletions pandas/tests/indexes/test_multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,12 @@ def test_astype(self):
self.index.astype(np.dtype(int))

def test_constructor_single_level(self):
single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(single_level, Index)
assert not isinstance(single_level, MultiIndex)
assert single_level.name == 'first'

single_level = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]])
assert single_level.name is None
result = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
labels=[[0, 1, 2, 3]], names=['first'])
assert isinstance(result, MultiIndex)
expected = Index(['foo', 'bar', 'baz', 'qux'], name='first')
tm.assert_index_equal(result.levels[0], expected)
assert result.names == ['first']

def test_constructor_no_levels(self):
tm.assert_raises_regex(ValueError, "non-zero number "
Expand Down Expand Up @@ -768,8 +765,9 @@ def test_from_arrays_empty(self):

# 1 level
result = MultiIndex.from_arrays(arrays=[[]], names=['A'])
assert isinstance(result, MultiIndex)
expected = Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# N levels
for N in [2, 3]:
Expand Down Expand Up @@ -830,7 +828,7 @@ def test_from_product_empty(self):
# 1 level
result = MultiIndex.from_product([[]], names=['A'])
expected = pd.Index([], name='A')
tm.assert_index_equal(result, expected)
tm.assert_index_equal(result.levels[0], expected)

# 2 levels
l1 = [[], ['foo', 'bar', 'baz'], []]
Expand Down
4 changes: 4 additions & 0 deletions pandas/util/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1909,7 +1909,11 @@ def keyfunc(x):

# convert tuples to index
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here

if nentries == 1:
# we have a single level of tuples, i.e. a regular Index
index = Index(tuples[0], name=names[0])
elif nlevels == 1:
name = None if names is None else names[0]
index = Index((x[0] for x in tuples), name=name)
else:
index = MultiIndex.from_tuples(tuples, names=names)
return index
Expand Down