Skip to content

Commit

Permalink
PERF: Allow Index.to_frame to return RangeIndex columns (#58018)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored Mar 28, 2024
1 parent 3904711 commit da80247
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ Performance improvements
- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
- Performance improvement in :meth:`Index.join` by propagating cached attributes in cases where the result matches one of the inputs (:issue:`57023`)
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
Expand Down
20 changes: 13 additions & 7 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1374,16 +1374,19 @@ def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
return attrs

@final
def _get_level_names(self) -> Hashable | Sequence[Hashable]:
def _get_level_names(self) -> range | Sequence[Hashable]:
"""
Return a name or list of names with None replaced by the level number.
"""
if self._is_multi:
return [
level if name is None else name for level, name in enumerate(self.names)
]
return maybe_sequence_to_range(
[
level if name is None else name
for level, name in enumerate(self.names)
]
)
else:
return 0 if self.name is None else self.name
return range(1) if self.name is None else [self.name]

@final
def _mpl_repr(self) -> np.ndarray:
Expand Down Expand Up @@ -1630,8 +1633,11 @@ def to_frame(
from pandas import DataFrame

if name is lib.no_default:
name = self._get_level_names()
result = DataFrame({name: self}, copy=False)
result_name = self._get_level_names()
else:
result_name = Index([name]) # type: ignore[assignment]
result = DataFrame(self, copy=False)
result.columns = result_name

if index:
result.index = self
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/multi/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from pandas import (
DataFrame,
MultiIndex,
RangeIndex,
)
import pandas._testing as tm

Expand Down Expand Up @@ -148,6 +149,13 @@ def test_to_frame_duplicate_labels():
tm.assert_frame_equal(result, expected)


def test_to_frame_column_rangeindex():
mi = MultiIndex.from_arrays([[1, 2], ["a", "b"]])
result = mi.to_frame().columns
expected = RangeIndex(2)
tm.assert_index_equal(result, expected, exact=True)


def test_to_flat_index(idx):
expected = pd.Index(
(
Expand Down
14 changes: 14 additions & 0 deletions pandas/tests/indexes/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,3 +508,17 @@ def test_compare_read_only_array():
idx = pd.Index(arr)
result = idx > 69
assert result.dtype == bool


def test_to_frame_column_rangeindex():
idx = pd.Index([1])
result = idx.to_frame().columns
expected = RangeIndex(1)
tm.assert_index_equal(result, expected, exact=True)


def test_to_frame_name_tuple_multiindex():
idx = pd.Index([1])
result = idx.to_frame(name=(1, 2))
expected = pd.DataFrame([1], columns=MultiIndex.from_arrays([[1], [2]]), index=idx)
tm.assert_frame_equal(result, expected)

0 comments on commit da80247

Please sign in to comment.