Skip to content

Commit

Permalink
Improve representation of MultiIndex(#6992)
Browse files Browse the repository at this point in the history
Fixes: #6936 

This PR introduces changes to `MultiIndex.__repr__`, where the output is now more readable and easy to understand similar to that of pandas MultiIndex. Changes also include handling of `<NA>`, `nan` values and spacing issues around them.

Authors:
  - galipremsagar <sagarprem75@gmail.com>

Approvers:
  - null
  - Keith Kraus

URL: #6992
  • Loading branch information
galipremsagar authored Dec 21, 2020
1 parent 923cf49 commit 7556e23
Show file tree
Hide file tree
Showing 6 changed files with 513 additions and 298 deletions.
24 changes: 6 additions & 18 deletions python/cudf/cudf/core/column/string.py
Original file line number Diff line number Diff line change
Expand Up @@ -2305,15 +2305,9 @@ def partition(self, sep=" ", expand=True):
Which will create a MultiIndex:
>>> idx.str.partition()
MultiIndex(levels=[0 X
1 Y
dtype: object, 0
dtype: object, 0 123
1 999
dtype: object],
codes= 0 1 2
0 0 0 0
1 1 0 1)
MultiIndex([('X', ' ', '123'),
('Y', ' ', '999')],
)
"""
if expand is not True:
raise NotImplementedError(
Expand Down Expand Up @@ -2375,15 +2369,9 @@ def rpartition(self, sep=" ", expand=True):
Which will create a MultiIndex:
>>> idx.str.rpartition()
MultiIndex(levels=[0 X
1 Y
dtype: object, 0
dtype: object, 0 123
1 999
dtype: object],
codes= 0 1 2
0 0 0 0
1 1 0 1)
MultiIndex([('X', ' ', '123'),
('Y', ' ', '999')],
)
"""
if expand is not True:
raise NotImplementedError(
Expand Down
22 changes: 7 additions & 15 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7248,23 +7248,15 @@ def from_pandas(obj, nan_as_null=None):
(3, 2),
(4, 2),
(5, 1)],
names=['x', 'y'])
names=['x', 'y'])
>>> gmidx = cudf.from_pandas(pmidx)
>>> gmidx
MultiIndex(levels=[0 1
1 3
2 4
3 5
dtype: int64, 0 1
1 2
2 5
dtype: int64],
codes= x y
0 0 0
1 0 2
2 1 1
3 2 1
4 3 0)
MultiIndex([(1, 1),
(1, 5),
(3, 2),
(4, 2),
(5, 1)],
names=['x', 'y'])
>>> type(gmidx)
<class 'cudf.core.multiindex.MultiIndex'>
>>> type(pmidx)
Expand Down
20 changes: 6 additions & 14 deletions python/cudf/cudf/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,20 +119,12 @@ def size(self):
... names=["x", "y"],
... )
>>> midx
MultiIndex(levels=[0 a
1 b
2 c
3 None
dtype: object, 0 1
1 None
2 5
dtype: object],
codes= x y
0 0 0
1 0 2
2 1 1
3 2 1
4 3 0)
MultiIndex([( 'a', '1'),
( 'a', '5'),
( 'b', <NA>),
( 'c', <NA>),
(<NA>, '1')],
names=['x', 'y'])
>>> midx.size
5
"""
Expand Down
158 changes: 52 additions & 106 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,9 @@ def __init__(
UInt64Index([1, 2, 3], dtype='uint64', name='a')
>>> cudf.Index(cudf.DataFrame({"a":[1, 2], "b":[2, 3]}))
MultiIndex(levels=[0 1
1 2
dtype: int64, 0 2
1 3
dtype: int64],
codes= a b
0 0 0
1 1 1)
MultiIndex([(1, 2),
(2, 3)],
names=['a', 'b'])
"""
pass

Expand Down Expand Up @@ -407,31 +402,17 @@ def dropna(self, how="any"):
... names=["x", "y"],
... )
>>> midx
MultiIndex(levels=[0 1
1 null
2 4
3 null
dtype: int64, 0 1
1 2
2 5
dtype: int64],
codes= x y
0 0 0
1 0 2
2 1 1
3 2 1
4 3 0)
MultiIndex([( 1, 1),
( 1, 5),
(<NA>, 2),
( 4, 2),
(<NA>, 1)],
names=['x', 'y'])
>>> midx.dropna()
MultiIndex(levels=[0 1
1 4
dtype: int64, 0 1
1 2
2 5
dtype: int64],
codes= x y
0 0 0
1 0 2
2 1 1)
MultiIndex([(1, 1),
(1, 5),
(4, 2)],
names=['x', 'y'])
"""
return super().dropna(how=how)

Expand Down Expand Up @@ -516,16 +497,11 @@ def set_names(self, names, level=None, inplace=False):
>>> idx = cudf.MultiIndex.from_product([['python', 'cobra'],
... [2018, 2019]])
>>> idx
MultiIndex(levels=[0 cobra
1 python
dtype: object, 0 2018
1 2019
dtype: int64],
codes= 0 1
0 1 0
1 1 1
2 0 0
3 0 1)
MultiIndex([('python', 2018),
('python', 2019),
( 'cobra', 2018),
( 'cobra', 2019)],
)
>>> idx.names
FrozenList([None, None])
>>> idx.set_names(['kind', 'year'], inplace=True)
Expand Down Expand Up @@ -622,20 +598,12 @@ def argsort(self, ascending=True, **kwargs):
... names=["x", "y"],
... )
>>> index
MultiIndex(levels=[0 1
1 3
2 4
3 -10
dtype: int64, 0 1
1 11
2 5
dtype: int64],
codes= x y
0 0 0
1 0 2
2 1 1
3 2 1
4 3 0)
MultiIndex([( 1, 1),
( 1, 5),
( 3, 11),
( 4, 11),
(-10, 1)],
names=['x', 'y'])
>>> index.argsort()
array([4, 0, 1, 2, 3], dtype=int32)
>>> index.argsort(ascending=False)
Expand Down Expand Up @@ -980,50 +948,26 @@ def sort_values(self, return_indexer=False, ascending=True, key=None):
... names=["x", "y"],
... )
>>> midx
MultiIndex(levels=[0 1
1 3
2 4
3 -10
dtype: int64, 0 1
1 11
2 5
dtype: int64],
codes= x y
0 0 0
1 0 2
2 1 1
3 2 1
4 3 0)
MultiIndex([( 1, 1),
( 1, 5),
( 3, 11),
( 4, 11),
(-10, 1)],
names=['x', 'y'])
>>> midx.sort_values()
MultiIndex(levels=[0 1
1 3
2 4
3 -10
dtype: int64, 0 1
1 11
2 5
dtype: int64],
codes= x y
4 3 0
0 0 0
1 0 2
2 1 1
3 2 1)
MultiIndex([(-10, 1),
( 1, 1),
( 1, 5),
( 3, 11),
( 4, 11)],
names=['x', 'y'])
>>> midx.sort_values(ascending=False)
MultiIndex(levels=[0 1
1 3
2 4
3 -10
dtype: int64, 0 1
1 11
2 5
dtype: int64],
codes= x y
3 2 1
2 1 1
1 0 2
0 0 0
4 3 0)
MultiIndex([( 4, 11),
( 3, 11),
( 1, 5),
( 1, 1),
(-10, 1)],
names=['x', 'y'])
"""
if key is not None:
raise NotImplementedError("key parameter is not yet implemented.")
Expand Down Expand Up @@ -1138,16 +1082,18 @@ def join(
>>> lhs = cudf.DataFrame(
... {"a":[2, 3, 1], "b":[3, 4, 2]}).set_index(['a', 'b']
... ).index
>>> lhs
MultiIndex([(2, 3),
(3, 4),
(1, 2)],
names=['a', 'b'])
>>> rhs = cudf.DataFrame({"a":[1, 4, 3]}).set_index('a').index
>>> rhs
Int64Index([1, 4, 3], dtype='int64', name='a')
>>> lhs.join(rhs, how='inner')
MultiIndex(levels=[0 1
1 3
dtype: int64, 0 2
1 4
dtype: int64],
codes= a b
0 1 1
1 0 0)
MultiIndex([(3, 4),
(1, 2)],
names=['a', 'b'])
"""

if isinstance(self, cudf.MultiIndex) and isinstance(
Expand Down
Loading

0 comments on commit 7556e23

Please sign in to comment.