From aac172cefd468f3896ea9019b92fdc00efef6ef4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 11:59:22 -0600 Subject: [PATCH] [WIP]: API: Change default for Index.union sort Closes https://github.com/pandas-dev/pandas/issues/24959 --- pandas/core/indexes/base.py | 37 +++++++++++++++++++++---- pandas/tests/indexes/test_base.py | 46 +++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 767da81c5c43a..41430249055f9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self - def union(self, other, sort=True): + def union(self, other, sort=None): """ Form the union of two Index objects. Parameters ---------- other : Index or array-like - sort : bool, default True - Sort the resulting index if possible + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * True : sort the result. A TypeError is raised when the + values cannot be compared. + * False : do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.0 + + Changed the default `sort` to None, matching the + behavior of pandas 0.23.4 and earlier. + Returns ------- union : Index @@ -2273,10 +2289,16 @@ def union(self, other, sort=True): other = ensure_index(other) if len(other) == 0 or self.equals(other): - return self._get_reconciled_name_object(other) + result = self._get_reconciled_name_object(other) + if sort: + result = result.sort_values() + return result if len(self) == 0: - return other._get_reconciled_name_object(self) + result = other._get_reconciled_name_object(self) + if sort: + result = result.sort_values() + return result # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2319,13 +2341,16 @@ def union(self, other, sort=True): else: result = lvals - if sort: + if sort is None: try: result = sorting.safe_sort(result) except TypeError as e: warnings.warn("{}, sort order is undefined for " "incomparable objects".format(e), RuntimeWarning, stacklevel=3) + elif sort: + # raise if not sortable. + result = sorting.safe_sort(result) # for subclasses return self._wrap_setop_result(other, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f3e9d835c7391..c38e956dafcac 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -794,6 +794,52 @@ def test_union(self, sort): tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) + def test_union_sort_other_equal(self): + a = pd.Index([1, 0, 2]) + # default, sort=None + result = a.union(a) + tm.assert_index_equal(result, a) + + # sort=True + result = a.union(a, sort=True) + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + # sort=False + result = a.union(a, sort=False) + tm.assert_index_equal(result, a) + + def test_union_sort_other_empty(self): + a = pd.Index([1, 0, 2]) + # default, sort=None + tm.assert_index_equal(a.union(a[:0]), a) + tm.assert_index_equal(a[:0].union(a), a) + + # sort=True + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(a.union(a[:0], sort=True), expected) + tm.assert_index_equal(a[:0].union(a, sort=True), expected) + + # sort=False + tm.assert_index_equal(a.union(a[:0], sort=False), a) + tm.assert_index_equal(a[:0].union(a, sort=False), a) + + def test_union_sort_other_incomparable(self): + a = pd.Index([1, pd.Timestamp('2000')]) + # default, sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = a.union(a[:1]) + + tm.assert_index_equal(result, a) + + # sort=True + with pytest.raises(TypeError, match='.*'): + a.union(a[:1], sort=True) + + # sort=False + result = a.union(a[:1], sort=False) + tm.assert_index_equal(result, a) + @pytest.mark.parametrize("klass", [ np.array, Series, list]) @pytest.mark.parametrize("sort", [True, False])