Skip to content

Commit

Permalink
ENH: Add sort parameter to RangeIndex.union (#24471) (#25788)
Browse files Browse the repository at this point in the history
  • Loading branch information
reidy-p authored and jreback committed Mar 26, 2019
1 parent d404460 commit af6ccf6
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 78 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Other Enhancements
- ``Series.str`` has gained :meth:`Series.str.casefold` method to removes all case distinctions present in a string (:issue:`25405`)
- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`)
- :meth:`DatetimeIndex.union` now supports the ``sort`` argument. The behaviour of the sort parameter matches that of :meth:`Index.union` (:issue:`24994`)
- :meth:`RangeIndex.union` now supports the ``sort`` argument. If ``sort=False`` an unsorted ``Int64Index`` is always returned. ``sort=None`` is the default and returns a mononotically increasing ``RangeIndex`` if possible or a sorted ``Int64Index`` if not (:issue:`24471`)
- :meth:`DataFrame.rename` now supports the ``errors`` argument to raise errors when attempting to rename nonexistent keys (:issue:`13473`)
- :class:`RangeIndex` has gained :attr:`~RangeIndex.start`, :attr:`~RangeIndex.stop`, and :attr:`~RangeIndex.step` attributes (:issue:`25710`)
- :class:`datetime.timezone` objects are now supported as arguments to timezone methods and constructors (:issue:`25065`)
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2320,7 +2320,7 @@ def union(self, other, sort=None):
else:
rvals = other._values

if self.is_monotonic and other.is_monotonic:
if sort is None and self.is_monotonic and other.is_monotonic:
try:
result = self._outer_indexer(lvals, rvals)[0]
except TypeError:
Expand Down
16 changes: 12 additions & 4 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,23 +463,31 @@ def _extended_gcd(self, a, b):
old_t, t = t, old_t - quotient * t
return old_r, old_s, old_t

def union(self, other):
def union(self, other, sort=None):
"""
Form the union of two Index objects and sorts if possible
Parameters
----------
other : Index or array-like
sort : False or None, default None
Whether to sort resulting index. ``sort=None`` returns a
mononotically increasing ``RangeIndex`` if possible or a sorted
``Int64Index`` if not. ``sort=False`` always returns an
unsorted ``Int64Index``
.. versionadded:: 0.25.0
Returns
-------
union : Index
"""
self._assert_can_do_setop(other)
if len(other) == 0 or self.equals(other) or len(self) == 0:
return super(RangeIndex, self).union(other)
return super(RangeIndex, self).union(other, sort=sort)

if isinstance(other, RangeIndex):
if isinstance(other, RangeIndex) and sort is None:
start_s, step_s = self._start, self._step
end_s = self._start + self._step * (len(self) - 1)
start_o, step_o = other._start, other._step
Expand Down Expand Up @@ -516,7 +524,7 @@ def union(self, other):
(end_s - step_o <= end_o)):
return RangeIndex(start_r, end_r + step_o, step_o)

return self._int64index.union(other)
return self._int64index.union(other, sort=sort)

@Appender(_index_shared_docs['join'])
def join(self, other, how='left', level=None, return_indexers=False,
Expand Down
12 changes: 10 additions & 2 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,11 @@ def test_union_bug_1730(self, sort):
rng_b = date_range('1/1/2012', periods=4, freq='4H')

result = rng_a.union(rng_b, sort=sort)
exp = DatetimeIndex(sorted(set(list(rng_a)) | set(list(rng_b))))
exp = list(rng_a) + list(rng_b[1:])
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize("sort", [None, False])
Expand All @@ -112,7 +116,11 @@ def test_union_bug_4564(self, sort):
right = left + DateOffset(minutes=15)

result = left.union(right, sort=sort)
exp = DatetimeIndex(sorted(set(list(left)) | set(list(right))))
exp = list(left) + list(right)
if sort is None:
exp = DatetimeIndex(sorted(exp))
else:
exp = DatetimeIndex(exp)
tm.assert_index_equal(result, exp)

@pytest.mark.parametrize("sort", [None, False])
Expand Down
11 changes: 9 additions & 2 deletions pandas/tests/indexes/period/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,12 @@ def test_union(self, sort):
# union
other1 = pd.period_range('1/1/2000', freq='D', periods=5)
rng1 = pd.period_range('1/6/2000', freq='D', periods=5)
expected1 = pd.period_range('1/1/2000', freq='D', periods=10)
expected1 = pd.PeriodIndex(['2000-01-06', '2000-01-07',
'2000-01-08', '2000-01-09',
'2000-01-10', '2000-01-01',
'2000-01-02', '2000-01-03',
'2000-01-04', '2000-01-05'],
freq='D')

rng2 = pd.period_range('1/1/2000', freq='D', periods=5)
other2 = pd.period_range('1/4/2000', freq='D', periods=5)
Expand Down Expand Up @@ -77,7 +82,9 @@ def test_union(self, sort):

rng7 = pd.period_range('2003-01-01', freq='A', periods=5)
other7 = pd.period_range('1998-01-01', freq='A', periods=8)
expected7 = pd.period_range('1998-01-01', freq='A', periods=10)
expected7 = pd.PeriodIndex(['2003', '2004', '2005', '2006', '2007',
'1998', '1999', '2000', '2001', '2002'],
freq='A')

rng8 = pd.PeriodIndex(['1/3/2000', '1/2/2000', '1/1/2000',
'1/5/2000', '1/4/2000'], freq='D')
Expand Down
169 changes: 100 additions & 69 deletions pandas/tests/indexes/test_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

from .test_numeric import Numeric

# aliases to make some tests easier to read
RI = RangeIndex
I64 = Int64Index
F64 = Float64Index
OI = Index


class TestRangeIndex(Numeric):
_holder = RangeIndex
Expand Down Expand Up @@ -565,51 +571,73 @@ def test_intersection(self, sort):
expected = RangeIndex(0, 0, 1)
tm.assert_index_equal(result, expected)

def test_union_noncomparable(self):
@pytest.mark.parametrize('sort', [False, None])
def test_union_noncomparable(self, sort):
from datetime import datetime, timedelta
# corner case, non-Int64Index
now = datetime.now()
other = Index([now + timedelta(i) for i in range(4)], dtype=object)
result = self.index.union(other)
result = self.index.union(other, sort=sort)
expected = Index(np.concatenate((self.index, other)))
tm.assert_index_equal(result, expected)

result = other.union(self.index)
result = other.union(self.index, sort=sort)
expected = Index(np.concatenate((other, self.index)))
tm.assert_index_equal(result, expected)

def test_union(self):
RI = RangeIndex
I64 = Int64Index
cases = [(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1)),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1)),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1)),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1)),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1)),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2)),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1)),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5)),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5)),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1)),
(RI(0), RI(0), RI(0)),
(RI(0, -10, -2), RI(0), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2)),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2)),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1)),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5)),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5)),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1)),
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]))]
for idx1, idx2, expected in cases:
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)
res3 = idx1._int64index.union(idx2)
tm.assert_index_equal(res1, expected, exact=True)
tm.assert_index_equal(res2, expected, exact=True)
tm.assert_index_equal(res3, expected)
@pytest.fixture(params=[
(RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0, 10, 1), RI(5, 20, 1), RI(0, 20, 1), I64(range(20))),
(RI(0, 10, 1), RI(10, 20, 1), RI(0, 20, 1), I64(range(20))),
(RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1), RI(0, -10, -1)),
(RI(0, -10, -1), RI(-10, -20, -1), RI(-19, 1, 1),
I64(range(0, -20, -1))),
(RI(0, 10, 2), RI(1, 10, 2), RI(0, 10, 1),
I64(list(range(0, 10, 2)) + list(range(1, 10, 2)))),
(RI(0, 11, 2), RI(1, 12, 2), RI(0, 12, 1),
I64(list(range(0, 11, 2)) + list(range(1, 12, 2)))),
(RI(0, 21, 4), RI(-2, 24, 4), RI(-2, 24, 2),
I64(list(range(0, 21, 4)) + list(range(-2, 24, 4)))),
(RI(0, -20, -2), RI(-1, -21, -2), RI(-19, 1, 1),
I64(list(range(0, -20, -2)) + list(range(-1, -21, -2)))),
(RI(0, 100, 5), RI(0, 100, 20), RI(0, 100, 5), I64(range(0, 100, 5))),
(RI(0, -100, -5), RI(5, -100, -20), RI(-95, 10, 5),
I64(list(range(0, -100, -5)) + [5])),
(RI(0, -11, -1), RI(1, -12, -4), RI(-11, 2, 1),
I64(list(range(0, -11, -1)) + [1, -11])),
(RI(0), RI(0), RI(0), RI(0)),
(RI(0, -10, -2), RI(0), RI(0, -10, -2), RI(0, -10, -2)),
(RI(0, 100, 2), RI(100, 150, 200), RI(0, 102, 2),
I64(range(0, 102, 2))),
(RI(0, -100, -2), RI(-100, 50, 102), RI(-100, 4, 2),
I64(list(range(0, -100, -2)) + [-100, 2])),
(RI(0, -100, -1), RI(0, -50, -3), RI(-99, 1, 1),
I64(list(range(0, -100, -1)))),
(RI(0, 1, 1), RI(5, 6, 10), RI(0, 6, 5), I64([0, 5])),
(RI(0, 10, 5), RI(-5, -6, -20), RI(-5, 10, 5), I64([0, 5, -5])),
(RI(0, 3, 1), RI(4, 5, 1), I64([0, 1, 2, 4]), I64([0, 1, 2, 4])),
(RI(0, 10, 1), I64([]), RI(0, 10, 1), RI(0, 10, 1)),
(RI(0), I64([1, 5, 6]), I64([1, 5, 6]), I64([1, 5, 6]))
])
def unions(self, request):
"""Inputs and expected outputs for RangeIndex.union tests"""

return request.param

def test_union_sorted(self, unions):

idx1, idx2, expected_sorted, expected_notsorted = unions

res1 = idx1.union(idx2, sort=None)
tm.assert_index_equal(res1, expected_sorted, exact=True)

res1 = idx1.union(idx2, sort=False)
tm.assert_index_equal(res1, expected_notsorted, exact=True)

res2 = idx2.union(idx1, sort=None)
res3 = idx1._int64index.union(idx2, sort=None)
tm.assert_index_equal(res2, expected_sorted, exact=True)
tm.assert_index_equal(res3, expected_sorted)

def test_nbytes(self):

Expand Down Expand Up @@ -840,38 +868,41 @@ def test_len_specialised(self):
i = RangeIndex(0, 5, step)
assert len(i) == 0

def test_append(self):
@pytest.fixture(params=[
([RI(1, 12, 5)], RI(1, 12, 5)),
([RI(0, 6, 4)], RI(0, 6, 4)),
([RI(1, 3), RI(3, 7)], RI(1, 7)),
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
([RI(2,), RI(2)], I64([0, 1, 0, 1])),
([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
])
def appends(self, request):
"""Inputs and expected outputs for RangeIndex.append test"""

return request.param

def test_append(self, appends):
# GH16212
RI = RangeIndex
I64 = Int64Index
F64 = Float64Index
OI = Index
cases = [([RI(1, 12, 5)], RI(1, 12, 5)),
([RI(0, 6, 4)], RI(0, 6, 4)),
([RI(1, 3), RI(3, 7)], RI(1, 7)),
([RI(1, 5, 2), RI(5, 6)], RI(1, 6, 2)),
([RI(1, 3, 2), RI(4, 7, 3)], RI(1, 7, 3)),
([RI(-4, 3, 2), RI(4, 7, 2)], RI(-4, 7, 2)),
([RI(-4, -8), RI(-8, -12)], RI(0, 0)),
([RI(-4, -8), RI(3, -4)], RI(0, 0)),
([RI(-4, -8), RI(3, 5)], RI(3, 5)),
([RI(-4, -2), RI(3, 5)], I64([-4, -3, 3, 4])),
([RI(-2,), RI(3, 5)], RI(3, 5)),
([RI(2,), RI(2)], I64([0, 1, 0, 1])),
([RI(2,), RI(2, 5), RI(5, 8, 4)], RI(0, 6)),
([RI(2,), RI(3, 5), RI(5, 8, 4)], I64([0, 1, 3, 4, 5])),
([RI(-2, 2), RI(2, 5), RI(5, 8, 4)], RI(-2, 6)),
([RI(3,), I64([-1, 3, 15])], I64([0, 1, 2, -1, 3, 15])),
([RI(3,), F64([-1, 3.1, 15.])], F64([0, 1, 2, -1, 3.1, 15.])),
([RI(3,), OI(['a', None, 14])], OI([0, 1, 2, 'a', None, 14])),
([RI(3, 1), OI(['a', None, 14])], OI(['a', None, 14]))
]

for indices, expected in cases:
result = indices[0].append(indices[1:])
tm.assert_index_equal(result, expected, exact=True)

if len(indices) == 2:
# Append single item rather than list
result2 = indices[0].append(indices[1])
tm.assert_index_equal(result2, expected, exact=True)

indices, expected = appends

result = indices[0].append(indices[1:])
tm.assert_index_equal(result, expected, exact=True)

if len(indices) == 2:
# Append single item rather than list
result2 = indices[0].append(indices[1])
tm.assert_index_equal(result2, expected, exact=True)

0 comments on commit af6ccf6

Please sign in to comment.