From 151c425ea71a18f4ba8f6dfe1f6278dc0bd8dc23 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 20 Oct 2020 14:26:32 +0300 Subject: [PATCH 01/71] BUG: stabilize sorting in Series.sort_values --- pandas/core/series.py | 47 ++---------------------------------------- pandas/core/sorting.py | 11 +++++++++- 2 files changed, 12 insertions(+), 46 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 7ca2d76905e28..087d3ca54b9f0 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -92,7 +92,7 @@ from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager from pandas.core.shared_docs import _shared_docs -from pandas.core.sorting import ensure_key_mapped +from pandas.core.sorting import ensure_key_mapped, nargsort from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -3274,52 +3274,9 @@ def sort_values( "sort in-place you must create a copy" ) - def _try_kind_sort(arr): - arr = ensure_key_mapped(arr, key) - arr = getattr(arr, "_values", arr) - - # easier to ask forgiveness than permission - try: - # if kind==mergesort, it can fail for object dtype - return arr.argsort(kind=kind) - except TypeError: - # stable sort not available for object dtype - # uses the argsort default quicksort - return arr.argsort(kind="quicksort") - arr = self._values - sorted_index = np.empty(len(self), dtype=np.int32) - - bad = isna(arr) - good = ~bad - idx = ibase.default_index(len(self)) - - argsorted = _try_kind_sort(self[good]) - - if is_list_like(ascending): - if len(ascending) != 1: - raise ValueError( - f"Length of ascending ({len(ascending)}) must be 1 for Series" - ) - ascending = ascending[0] - - if not is_bool(ascending): - raise ValueError("ascending must be boolean") - - if not ascending: - argsorted = argsorted[::-1] - - if na_position == "last": - n = good.sum() - sorted_index[:n] = idx[good][argsorted] - sorted_index[n:] = idx[bad] - elif na_position == "first": - n = bad.sum() - sorted_index[n:] = idx[good][argsorted] - sorted_index[:n] = idx[bad] - else: - raise ValueError(f"invalid na_position: {na_position}") + sorted_index = nargsort(arr, kind, ascending, na_position, key) result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 1132234ae7f8d..49b4aa91b2b36 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -374,7 +374,16 @@ def nargsort( if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] - indexer = non_nan_idx[non_nans.argsort(kind=kind)] + + # GH 35922. Move support for object sort here from Series.sort_values + try: + # if kind==mergesort, it can fail for object dtype + indexer = non_nan_idx[non_nans.argsort(kind=kind)] + except TypeError: + # stable sort not available for object dtype + # uses the argsort default quicksort + indexer = non_nan_idx[non_nans.argsort(kind="quicksort")] + if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to From 733225512431ed61aefd6e67ec0fe6f290615f3b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 20 Oct 2020 14:36:57 +0300 Subject: [PATCH 02/71] DOC: add comment to nargsort call in Series.sort_values --- pandas/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 087d3ca54b9f0..8ada6437eccd6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3276,6 +3276,7 @@ def sort_values( arr = self._values + # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position, key) result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) From 546b9fae43dceb2f586e22ef9b06c513fc574b61 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 12:16:55 +0300 Subject: [PATCH 03/71] use nargsort with indices: Period, DateTime, TimeDelta --- pandas/core/indexes/base.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index f336eec8c4cce..d780ebc716292 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4510,9 +4510,7 @@ def sort_values( # GH 35584. Sort missing values according to na_position kwarg # ignore na_position for MultiIndex - if not isinstance( - self, (ABCMultiIndex, ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex) - ): + if not isinstance(self, ABCMultiIndex): _as = nargsort( items=idx, ascending=ascending, na_position=na_position, key=key ) From 9b51d42f25b06cf06953c8be8235d55fc18aa4b5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 15:24:09 +0300 Subject: [PATCH 04/71] mv NaNs to the end of dupe lists in value_counts --- pandas/core/algorithms.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9a3144d1ccbaa..e2f243d376d3a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -822,9 +822,19 @@ def value_counts_arraylike(values, dropna: bool): mask = isna(values) if not dropna and mask.any(): + # GH 35922. Series.sort_values is stable now, so need to + # append NaN counts or move to the end to make sure they are + # sorted toward the end when calling value_counts if not isna(keys).any(): - keys = np.insert(keys, 0, np.NaN) - counts = np.insert(counts, 0, mask.sum()) + keys = np.append(keys, np.NaN) + counts = np.append(counts, mask.sum()) + else: + nan_pos = np.where(np.isnan(keys)) + keys[nan_pos] = keys[-1] + keys[-1] = np.NaN + tmp = counts[nan_pos] + counts[nan_pos] = counts[-1] + counts[-1] = tmp keys = _reconstruct_data(keys, original.dtype, original) From 7805d1e66c6d476aa70ab0eb65a3abcd53a7edbf Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 15:54:20 +0300 Subject: [PATCH 05/71] CLN: remove extra comment indents --- pandas/core/algorithms.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index e2f243d376d3a..caba3245c773a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -822,9 +822,9 @@ def value_counts_arraylike(values, dropna: bool): mask = isna(values) if not dropna and mask.any(): - # GH 35922. Series.sort_values is stable now, so need to - # append NaN counts or move to the end to make sure they are - # sorted toward the end when calling value_counts + # GH 35922. Series.sort_values is stable now, so need to + # append NaN counts or move to the end to make sure they are + # sorted toward the end when calling value_counts if not isna(keys).any(): keys = np.append(keys, np.NaN) counts = np.append(counts, mask.sum()) From 2b75f789d18840ec50d33eb83dae963e7f303c8c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 16:59:54 +0300 Subject: [PATCH 06/71] attempt to mimic previous count_values behavior by reversing before sort --- pandas/core/algorithms.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index caba3245c773a..98309f6e2b891 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -819,22 +819,19 @@ def value_counts_arraylike(values, dropna: bool): # TODO: handle uint8 f = getattr(htable, f"value_count_{ndtype}") keys, counts = f(values, dropna) + # GH 35922. Mimic previous value_counts behavior now that + # Series.sort_values is stable + keys = keys[::-1] + counts = counts[::-1] mask = isna(values) if not dropna and mask.any(): # GH 35922. Series.sort_values is stable now, so need to - # append NaN counts or move to the end to make sure they are + # append NaN counts to make sure they are # sorted toward the end when calling value_counts if not isna(keys).any(): keys = np.append(keys, np.NaN) counts = np.append(counts, mask.sum()) - else: - nan_pos = np.where(np.isnan(keys)) - keys[nan_pos] = keys[-1] - keys[-1] = np.NaN - tmp = counts[nan_pos] - counts[nan_pos] = counts[-1] - counts[-1] = tmp keys = _reconstruct_data(keys, original.dtype, original) From 2844a971dcbe21b071e27ebf086a129d32650463 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 17:08:34 +0300 Subject: [PATCH 07/71] CLN: clean-up unnecessary import --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 8ada6437eccd6..f4c76b23911d6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -92,7 +92,7 @@ from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager from pandas.core.shared_docs import _shared_docs -from pandas.core.sorting import ensure_key_mapped, nargsort +from pandas.core.sorting import nargsort from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime From 965a5473be415424066610d9de1c0b13d1c6748b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 17:35:08 +0300 Subject: [PATCH 08/71] Revert "attempt to mimic previous count_values behavior by reversing before sort" This reverts commit 2b75f789d18840ec50d33eb83dae963e7f303c8c. --- pandas/core/algorithms.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 98309f6e2b891..caba3245c773a 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -819,19 +819,22 @@ def value_counts_arraylike(values, dropna: bool): # TODO: handle uint8 f = getattr(htable, f"value_count_{ndtype}") keys, counts = f(values, dropna) - # GH 35922. Mimic previous value_counts behavior now that - # Series.sort_values is stable - keys = keys[::-1] - counts = counts[::-1] mask = isna(values) if not dropna and mask.any(): # GH 35922. Series.sort_values is stable now, so need to - # append NaN counts to make sure they are + # append NaN counts or move to the end to make sure they are # sorted toward the end when calling value_counts if not isna(keys).any(): keys = np.append(keys, np.NaN) counts = np.append(counts, mask.sum()) + else: + nan_pos = np.where(np.isnan(keys)) + keys[nan_pos] = keys[-1] + keys[-1] = np.NaN + tmp = counts[nan_pos] + counts[nan_pos] = counts[-1] + counts[-1] = tmp keys = _reconstruct_data(keys, original.dtype, original) From 29d47ee950534aaf6ff1e85f611b4842825813c4 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 17:48:20 +0300 Subject: [PATCH 09/71] TST: alter tests in test_algos --- pandas/tests/test_algos.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 3a1279c481a1d..16142de6258f9 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1166,12 +1166,12 @@ def test_value_counts_normalized(self): s_typed = s.astype(t) result = s_typed.value_counts(normalize=True, dropna=False) expected = Series( - [0.6, 0.2, 0.2], index=Series([np.nan, 2.0, 1.0], dtype=t) + [0.6, 0.2, 0.2], index=Series([np.nan, 1.0, 2.0], dtype=t) ) tm.assert_series_equal(result, expected) result = s_typed.value_counts(normalize=True, dropna=True) - expected = Series([0.5, 0.5], index=Series([2.0, 1.0], dtype=t)) + expected = Series([0.5, 0.5], index=Series([1.0, 2.0], dtype=t)) tm.assert_series_equal(result, expected) def test_value_counts_uint64(self): @@ -1182,7 +1182,7 @@ def test_value_counts_uint64(self): tm.assert_series_equal(result, expected) arr = np.array([-1, 2 ** 63], dtype=object) - expected = Series([1, 1], index=[-1, 2 ** 63]) + expected = Series([1, 1], index=[2 ** 63, -1]) result = algos.value_counts(arr) # 32-bit linux has a different ordering From 151196d4dcb14d57e8fb49e8514565fcfd81fb31 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 17:58:05 +0300 Subject: [PATCH 10/71] TST: alter value_counts dupe order in boolean/test_function --- pandas/tests/arrays/boolean/test_function.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/boolean/test_function.py b/pandas/tests/arrays/boolean/test_function.py index 1547f08fa66b0..7665c350e3443 100644 --- a/pandas/tests/arrays/boolean/test_function.py +++ b/pandas/tests/arrays/boolean/test_function.py @@ -77,11 +77,11 @@ def test_ufunc_reduce_raises(values): def test_value_counts_na(): arr = pd.array([True, False, pd.NA], dtype="boolean") result = arr.value_counts(dropna=False) - expected = pd.Series([1, 1, 1], index=[True, False, pd.NA], dtype="Int64") + expected = pd.Series([1, 1, 1], index=[False, True, pd.NA], dtype="Int64") tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) - expected = pd.Series([1, 1], index=[True, False], dtype="Int64") + expected = pd.Series([1, 1], index=[False, True], dtype="Int64") tm.assert_series_equal(result, expected) From d71bfc8e35e8e2be923982bd35f54c669a0bbfa6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 18:19:06 +0300 Subject: [PATCH 11/71] mv NaT to end of dupe sort order, alter test_value_counts NaT is expected to be last in th elist of duplicate value counts guarantee that by finding it and moving to the end of the array (consider giving it up: the code ends up cluttered) Also alter test_value_counts to make sure that expectations match new sort_values stability --- pandas/core/algorithms.py | 11 ++++++++++- pandas/tests/base/test_value_counts.py | 6 +++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index caba3245c773a..a9e0b8109aa4c 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -809,9 +809,18 @@ def value_counts_arraylike(values, dropna: bool): keys, counts = htable.value_count_int64(values, dropna) + msk = keys != iNaT if dropna: - msk = keys != iNaT keys, counts = keys[msk], counts[msk] + # GH 35922. Since sort_values is stable now, move NaT to the end + # to make sure NaT count is sorted toward the end. + if msk.sum() != len(keys): + nat_pos = np.where(~msk) + keys[nat_pos] = keys[-1] + keys[-1] = iNaT + tmp = counts[nat_pos] + counts[nat_pos] = counts[-1] + counts[-1] = tmp else: # ndarray like diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 602133bb4122e..7d783e7a2cd8b 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -153,16 +153,16 @@ def test_value_counts_bins(index_or_series): # these return the same res4 = s1.value_counts(bins=4, dropna=True) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) - exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) res4 = s1.value_counts(bins=4, dropna=False) intervals = IntervalIndex.from_breaks([0.997, 1.5, 2.0, 2.5, 3.0]) - exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 3, 1, 2])) + exp4 = Series([2, 1, 1, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) - exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 3, 1, 2])) + exp4n = Series([0.5, 0.25, 0.25, 0], index=intervals.take([0, 1, 3, 2])) tm.assert_series_equal(res4n, exp4n) # handle NA's properly From aff28acbdeba21caa59d99064e13f5ef7ec31991 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 21 Oct 2020 18:24:27 +0300 Subject: [PATCH 12/71] REFACT: use tuple unpacking for element swap --- pandas/core/algorithms.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index a9e0b8109aa4c..3c0ec42c4a487 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -816,11 +816,8 @@ def value_counts_arraylike(values, dropna: bool): # to make sure NaT count is sorted toward the end. if msk.sum() != len(keys): nat_pos = np.where(~msk) - keys[nat_pos] = keys[-1] - keys[-1] = iNaT - tmp = counts[nat_pos] - counts[nat_pos] = counts[-1] - counts[-1] = tmp + keys[nat_pos], keys[-1] = keys[-1], keys[nat_pos] + counts[nat_pos], counts[-1] = counts[-1], counts[nat_pos] else: # ndarray like @@ -839,11 +836,8 @@ def value_counts_arraylike(values, dropna: bool): counts = np.append(counts, mask.sum()) else: nan_pos = np.where(np.isnan(keys)) - keys[nan_pos] = keys[-1] - keys[-1] = np.NaN - tmp = counts[nan_pos] - counts[nan_pos] = counts[-1] - counts[-1] = tmp + keys[nan_pos], keys[-1] = keys[-1], keys[nan_pos] + counts[nan_pos], counts[-1] = counts[-1], counts[nan_pos] keys = _reconstruct_data(keys, original.dtype, original) From 0b8aae984d69f5031fd148d6410a45f067f29962 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 09:32:36 +0300 Subject: [PATCH 13/71] DOC: clarify comments in algorithms/value_counts --- pandas/core/algorithms.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 3c0ec42c4a487..c9b7d9196a68b 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -812,8 +812,8 @@ def value_counts_arraylike(values, dropna: bool): msk = keys != iNaT if dropna: keys, counts = keys[msk], counts[msk] - # GH 35922. Since sort_values is stable now, move NaT to the end - # to make sure NaT count is sorted toward the end. + # GH 35922. Since Series.sort_values is stable now, move NaT to the end + # to make sure NaT count is the last among duplicate counts. if msk.sum() != len(keys): nat_pos = np.where(~msk) keys[nat_pos], keys[-1] = keys[-1], keys[nat_pos] @@ -828,9 +828,8 @@ def value_counts_arraylike(values, dropna: bool): mask = isna(values) if not dropna and mask.any(): - # GH 35922. Series.sort_values is stable now, so need to - # append NaN counts or move to the end to make sure they are - # sorted toward the end when calling value_counts + # GH 35922. Since Series.sort_values is stable now, move NaT to the end + # to make sure NaT count is the last among duplicate counts. if not isna(keys).any(): keys = np.append(keys, np.NaN) counts = np.append(counts, mask.sum()) From e7cebc4a481dda6128488501e9df53226f8b5245 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 10:21:34 +0300 Subject: [PATCH 14/71] stop forcing NaN-like to be at the end of dupe order --- pandas/core/algorithms.py | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index c9b7d9196a68b..9a3144d1ccbaa 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -809,15 +809,9 @@ def value_counts_arraylike(values, dropna: bool): keys, counts = htable.value_count_int64(values, dropna) - msk = keys != iNaT if dropna: + msk = keys != iNaT keys, counts = keys[msk], counts[msk] - # GH 35922. Since Series.sort_values is stable now, move NaT to the end - # to make sure NaT count is the last among duplicate counts. - if msk.sum() != len(keys): - nat_pos = np.where(~msk) - keys[nat_pos], keys[-1] = keys[-1], keys[nat_pos] - counts[nat_pos], counts[-1] = counts[-1], counts[nat_pos] else: # ndarray like @@ -828,15 +822,9 @@ def value_counts_arraylike(values, dropna: bool): mask = isna(values) if not dropna and mask.any(): - # GH 35922. Since Series.sort_values is stable now, move NaT to the end - # to make sure NaT count is the last among duplicate counts. if not isna(keys).any(): - keys = np.append(keys, np.NaN) - counts = np.append(counts, mask.sum()) - else: - nan_pos = np.where(np.isnan(keys)) - keys[nan_pos], keys[-1] = keys[-1], keys[nan_pos] - counts[nan_pos], counts[-1] = counts[-1], counts[nan_pos] + keys = np.insert(keys, 0, np.NaN) + counts = np.insert(counts, 0, mask.sum()) keys = _reconstruct_data(keys, original.dtype, original) From 06931e0a07c9b9fe557a87ca3d0cacaf569485a2 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 10:24:34 +0300 Subject: [PATCH 15/71] TST: NaN-like is now first among duplicates in count_values --- pandas/tests/test_algos.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 16142de6258f9..63ab60fbc4ba7 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1136,7 +1136,7 @@ def test_dropna(self): ) tm.assert_series_equal( Series([True, True, False, None]).value_counts(dropna=False), - Series([2, 1, 1], index=[True, False, np.nan]), + Series([2, 1, 1], index=[True, np.nan, False]), ) tm.assert_series_equal( Series([10.3, 5.0, 5.0]).value_counts(dropna=True), @@ -1155,7 +1155,7 @@ def test_dropna(self): # 32-bit linux has a different ordering if IS64: result = Series([10.3, 5.0, 5.0, None]).value_counts(dropna=False) - expected = Series([2, 1, 1], index=[5.0, 10.3, np.nan]) + expected = Series([2, 1, 1], index=[5.0, np.nan, 10.3]) tm.assert_series_equal(result, expected) def test_value_counts_normalized(self): From 0b24c3e2c12288ca780f8d708325181388a134dd Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 10:32:49 +0300 Subject: [PATCH 16/71] CLN: remove unnecessary is_bool import in series.py --- pandas/core/series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index f4c76b23911d6..148e91a2f3521 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -47,7 +47,6 @@ ) from pandas.core.dtypes.common import ( ensure_platform_int, - is_bool, is_categorical_dtype, is_dict_like, is_extension_array_dtype, From 1b98bffdd86c99f9bfcc2315414d397e3f20fce6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 10:52:27 +0300 Subject: [PATCH 17/71] TST: value_counts NaN dupe order change in test_string.py --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 56a8e21edd004..089bbcf4e0e3f 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -301,7 +301,7 @@ def test_arrow_roundtrip(): def test_value_counts_na(): arr = pd.array(["a", "b", "a", pd.NA], dtype="string") result = arr.value_counts(dropna=False) - expected = pd.Series([2, 1, 1], index=["a", "b", pd.NA], dtype="Int64") + expected = pd.Series([2, 1, 1], index=["a", pd.NA, "b"], dtype="Int64") tm.assert_series_equal(result, expected) result = arr.value_counts(dropna=True) From 4076f0cc4b7f8055ca34345bd4e0af0afbd51687 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 11:05:42 +0300 Subject: [PATCH 18/71] TST: value_counts NaN dupe order in test_value_counts.py --- pandas/tests/base/test_value_counts.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index 7d783e7a2cd8b..f0d190361619d 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -239,6 +239,11 @@ def test_value_counts_datetime64(index_or_series): tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) + # GH 35922. NaN-like now sorts to the beginning of duplicate counts + idx = pd.to_datetime( + ["2010-01-01 00:00:00", "2008-09-09 00:00:00", pd.NaT, "2009-01-01 00:00:00"] + ) + expected_s = Series([3, 2, 1, 1], index=idx) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) From 08aadd3c6ebcc7f1e37cfe6aebbde777c87f56ad Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 11:06:30 +0300 Subject: [PATCH 19/71] CLN: rm unnecessary assignment from test_value_counts --- pandas/tests/base/test_value_counts.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/base/test_value_counts.py b/pandas/tests/base/test_value_counts.py index f0d190361619d..98a52148cab4f 100644 --- a/pandas/tests/base/test_value_counts.py +++ b/pandas/tests/base/test_value_counts.py @@ -244,7 +244,6 @@ def test_value_counts_datetime64(index_or_series): ["2010-01-01 00:00:00", "2008-09-09 00:00:00", pd.NaT, "2009-01-01 00:00:00"] ) expected_s = Series([3, 2, 1, 1], index=idx) - expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() From 6f904e6c411b5c8a19f809dd647f03ef249ee091 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 11:21:44 +0300 Subject: [PATCH 20/71] TST: expect stable sort in extension/base/methods.py --- pandas/tests/extension/base/methods.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 23e20a2c0903a..69ba2c6fbed5a 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -124,8 +124,9 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key): ser = pd.Series(data_for_sorting) result = ser.sort_values(ascending=ascending, key=sort_by_key) expected = ser.iloc[[2, 0, 1]] + # GH 35922. Expect stable sort. if not ascending: - expected = expected[::-1] + expected = ser.iloc[[0, 1, 2]] self.assert_series_equal(result, expected) From 5c7eea94da84fc2426f83776cdd36e4c1654accf Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 12:24:07 +0300 Subject: [PATCH 21/71] BUG: support objs that raise when cast to their class --- pandas/core/sorting.py | 5 ++++- pandas/tests/extension/base/methods.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 49b4aa91b2b36..e2552c2ea5c3b 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -508,7 +508,10 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): result = Index(result) else: type_of_values = type(values) - result = type_of_values(result) # try to revert to original type otherwise + # GH 35922. Support sorting objects that raise when cast to their type + if not isinstance(result, type_of_values): + # try to revert to original type otherwise + result = type_of_values(result) except TypeError: raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 69ba2c6fbed5a..54c50c8291713 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -126,7 +126,7 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key): expected = ser.iloc[[2, 0, 1]] # GH 35922. Expect stable sort. if not ascending: - expected = ser.iloc[[0, 1, 2]] + expected = ser.iloc[[1, 0, 2]] self.assert_series_equal(result, expected) From 75aad121b7c759591610ce4312457583fab5cb16 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 12:26:51 +0300 Subject: [PATCH 22/71] TST: fix stable sort expectation in test_sort_values in methods.py --- pandas/tests/extension/base/methods.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 54c50c8291713..e973b1247941f 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -124,9 +124,12 @@ def test_sort_values(self, data_for_sorting, ascending, sort_by_key): ser = pd.Series(data_for_sorting) result = ser.sort_values(ascending=ascending, key=sort_by_key) expected = ser.iloc[[2, 0, 1]] - # GH 35922. Expect stable sort. if not ascending: - expected = ser.iloc[[1, 0, 2]] + # GH 35922. Expect stable sort + if ser.nunique() == 2: + expected = ser.iloc[[0, 1, 2]] + else: + expected = ser.iloc[[1, 0, 2]] self.assert_series_equal(result, expected) From e503dca1d6e489943edd0a0cd6891e2f2d0e7390 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 14:14:48 +0300 Subject: [PATCH 23/71] TST: change top expect for dupe counts in frame/test_describe --- pandas/tests/frame/methods/test_describe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/methods/test_describe.py b/pandas/tests/frame/methods/test_describe.py index d10d4c8ea05ab..b332146de150a 100644 --- a/pandas/tests/frame/methods/test_describe.py +++ b/pandas/tests/frame/methods/test_describe.py @@ -56,7 +56,7 @@ def test_describe_bool_frame(self): ) result = df.describe() expected = DataFrame( - {"bool_data_1": [4, 2, True, 2], "bool_data_2": [4, 2, True, 3]}, + {"bool_data_1": [4, 2, False, 2], "bool_data_2": [4, 2, True, 3]}, index=["count", "unique", "top", "freq"], ) tm.assert_frame_equal(result, expected) @@ -79,7 +79,7 @@ def test_describe_bool_frame(self): ) result = df.describe() expected = DataFrame( - {"bool_data": [4, 2, True, 2], "str_data": [4, 3, "a", 2]}, + {"bool_data": [4, 2, False, 2], "str_data": [4, 3, "a", 2]}, index=["count", "unique", "top", "freq"], ) tm.assert_frame_equal(result, expected) From 759de34819c51f95fa2903c7e88fa9c08c94e6ee Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 17:46:01 +0300 Subject: [PATCH 24/71] BUG: clean up crutches in Series.nlargest Now that sort_values is stable, we don't need to reverse order in algorithms.SelectNSeries.compute, which is the backend of Series.nlargest --- pandas/core/algorithms.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 9a3144d1ccbaa..b0889f7b681f3 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -1181,10 +1181,8 @@ def compute(self, method: str) -> Series: # slow method if n >= len(self.obj): - reverse_it = self.keep == "last" or method == "nlargest" ascending = method == "nsmallest" - slc = np.s_[::-1] if reverse_it else np.s_[:] - return dropped[slc].sort_values(ascending=ascending).head(n) + return dropped.sort_values(ascending=ascending).head(n) # fast method arr, pandas_dtype = _ensure_data(dropped.values) From 12741f29f7f2ede9b08127c29d5cc679602d4bba Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 17:55:54 +0300 Subject: [PATCH 25/71] TST: change dupe order expect in frame/methods/test_value_counts --- pandas/tests/frame/methods/test_value_counts.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index c409b0bbe6fa9..23f9ebdb4479d 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -48,7 +48,7 @@ def test_data_frame_value_counts_default(): expected = pd.Series( data=[2, 1, 1], index=pd.MultiIndex.from_arrays( - [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] ), ) @@ -65,7 +65,7 @@ def test_data_frame_value_counts_normalize(): expected = pd.Series( data=[0.5, 0.25, 0.25], index=pd.MultiIndex.from_arrays( - [(4, 6, 2), (0, 0, 2)], names=["num_legs", "num_wings"] + [(4, 2, 6), (0, 2, 0)], names=["num_legs", "num_wings"] ), ) @@ -78,7 +78,7 @@ def test_data_frame_value_counts_single_col_default(): result = df.value_counts() expected = pd.Series( data=[2, 1, 1], - index=pd.MultiIndex.from_arrays([[4, 6, 2]], names=["num_legs"]), + index=pd.MultiIndex.from_arrays([[4, 2, 6]], names=["num_legs"]), ) tm.assert_series_equal(result, expected) From d4339521eb8db9e7939dbcc52b6955feeaf84609 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:06:40 +0300 Subject: [PATCH 26/71] TST: change dupe order expectation in indexes/datetimes/test_ops --- pandas/tests/indexes/datetimes/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index ada4902f6900b..9b7447758d2cc 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -249,7 +249,7 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0]) + exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None From dc906df3302a1b7426e40ccdbabc6e702ef44b14 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:10:17 +0300 Subject: [PATCH 27/71] TST: specify na-position in indexes/datetimes/test_ops.py --- pandas/tests/indexes/datetimes/test_ops.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_ops.py b/pandas/tests/indexes/datetimes/test_ops.py index 9b7447758d2cc..8445ed7f7bc2a 100644 --- a/pandas/tests/indexes/datetimes/test_ops.py +++ b/pandas/tests/indexes/datetimes/test_ops.py @@ -231,7 +231,7 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") - ordered = index.sort_values() + ordered = index.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq is None @@ -239,7 +239,7 @@ def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None - ordered, indexer = index.sort_values(return_indexer=True) + ordered, indexer = index.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) From dbf295e78a1d748827236dc7e209b25a15b23955 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:35:03 +0300 Subject: [PATCH 28/71] TST: specify na-position in indexes/period/test_ops.py --- pandas/tests/indexes/period/test_ops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 74ca6ec59736b..46292ef6c35e2 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -178,7 +178,7 @@ def _check_freq(index, expected_index): pidx = PeriodIndex(["2011", "2013", "NaT", "2011"], name="pidx", freq="D") - result = pidx.sort_values() + result = pidx.sort_values(na_position="first") expected = PeriodIndex(["NaT", "2011", "2011", "2013"], name="pidx", freq="D") tm.assert_index_equal(result, expected) assert result.freq == "D" @@ -247,7 +247,7 @@ def test_order(self): ) for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: - ordered = idx.sort_values() + ordered = idx.sort_values(na_position="first") tm.assert_index_equal(ordered, expected) assert ordered.freq == "D" @@ -255,7 +255,7 @@ def test_order(self): tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq == "D" - ordered, indexer = idx.sort_values(return_indexer=True) + ordered, indexer = idx.sort_values(return_indexer=True, na_position="first") tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) @@ -265,7 +265,7 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0]) + exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq == "D" From cbe528e28aa6cd6bbff552364d96287fef9530dd Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:37:19 +0300 Subject: [PATCH 29/71] TST: remove xfail from test_order_stability_compat --- pandas/tests/indexes/period/test_ops.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/period/test_ops.py b/pandas/tests/indexes/period/test_ops.py index 46292ef6c35e2..10134b20e7d3e 100644 --- a/pandas/tests/indexes/period/test_ops.py +++ b/pandas/tests/indexes/period/test_ops.py @@ -332,12 +332,8 @@ def test_freq_setter_deprecated(self): idx.freq = pd.offsets.Day() -@pytest.mark.xfail(reason="Datetime-like sort_values currently unstable (GH 35922)") def test_order_stability_compat(): - # GH 35584. The new implementation of sort_values for Index.sort_values - # is stable when sorting in descending order. Datetime-like sort_values - # currently aren't stable. xfail should be removed after - # the implementations' behavior is synchronized (xref GH 35922) + # GH 35922. sort_values is stable both for normal and datetime-like Index pidx = PeriodIndex(["2011", "2013", "2015", "2012", "2011"], name="pidx", freq="A") iidx = Index([2011, 2013, 2015, 2012, 2011], name="idx") ordered1, indexer1 = pidx.sort_values(return_indexer=True, ascending=False) From 6658b73e34645b2793a190a23e614f3ec5f85f71 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:41:15 +0300 Subject: [PATCH 30/71] TST: change dupe order expect for indexes/timedeltas/test_ops --- pandas/tests/indexes/timedeltas/test_ops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/timedeltas/test_ops.py b/pandas/tests/indexes/timedeltas/test_ops.py index c4429137d17f0..eee2c918eaeb1 100644 --- a/pandas/tests/indexes/timedeltas/test_ops.py +++ b/pandas/tests/indexes/timedeltas/test_ops.py @@ -134,7 +134,7 @@ def test_order(self): ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) - exp = np.array([2, 1, 3, 4, 0]) + exp = np.array([2, 1, 3, 0, 4]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None From 076fa7a12c070293fe9352447c9376dcf88f5be5 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 18:51:18 +0300 Subject: [PATCH 31/71] BUG: reintroduce ascending param error-catching to sort_values --- pandas/core/series.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 148e91a2f3521..161ee5e9cfc3a 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -47,6 +47,7 @@ ) from pandas.core.dtypes.common import ( ensure_platform_int, + is_bool, is_categorical_dtype, is_dict_like, is_extension_array_dtype, @@ -3273,6 +3274,16 @@ def sort_values( "sort in-place you must create a copy" ) + if is_list_like(ascending): + if len(ascending) != 1: + raise ValueError( + f"Length of ascending ({len(ascending)}) must be 1 for Series" + ) + ascending = ascending[0] + + if not is_bool(ascending): + raise ValueError("ascending must be boolean") + arr = self._values # GH 35922. Make sorting stable by leveraging nargsort From cdf63a3b9a80edf061682d80ae64d05d82fc30f6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 19:05:13 +0300 Subject: [PATCH 32/71] BUG: reintroduce proper key func support to Series.sort_values --- pandas/core/series.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 161ee5e9cfc3a..33eb85eea1861 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -92,7 +92,7 @@ from pandas.core.indexing import check_bool_indexer from pandas.core.internals import SingleBlockManager from pandas.core.shared_docs import _shared_docs -from pandas.core.sorting import nargsort +from pandas.core.sorting import ensure_key_mapped, nargsort from pandas.core.strings import StringMethods from pandas.core.tools.datetimes import to_datetime @@ -3286,8 +3286,14 @@ def sort_values( arr = self._values + if key: + bad = isna(arr) + good = ~isna(arr) + + arr = np.concatenate([arr[bad], ensure_key_mapped(self[good], key)]) + # GH 35922. Make sorting stable by leveraging nargsort - sorted_index = nargsort(arr, kind, ascending, na_position, key) + sorted_index = nargsort(arr, kind, ascending, na_position) result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) From dd30ec99c93f2a7a8725da0885d27da3550fa813 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 19:09:55 +0300 Subject: [PATCH 33/71] BUG: fix bug in key func support --- pandas/core/series.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 33eb85eea1861..6af93edb7c5ff 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3289,8 +3289,7 @@ def sort_values( if key: bad = isna(arr) good = ~isna(arr) - - arr = np.concatenate([arr[bad], ensure_key_mapped(self[good], key)]) + arr[good] = ensure_key_mapped(self[good], key) # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position) From 5cac5c75fac03763622fc54e5ef439021d5d67d2 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 22 Oct 2020 19:16:38 +0300 Subject: [PATCH 34/71] TST: alter dupe order expect in series/methods/test_value_counts --- pandas/tests/series/methods/test_value_counts.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/series/methods/test_value_counts.py b/pandas/tests/series/methods/test_value_counts.py index 37da31fb2329a..93a2c3c27e2ae 100644 --- a/pandas/tests/series/methods/test_value_counts.py +++ b/pandas/tests/series/methods/test_value_counts.py @@ -185,7 +185,7 @@ def test_value_counts_categorical_with_nan(self): ( Series([False, True, True, pd.NA]), False, - Series([2, 1, 1], index=[True, False, pd.NA]), + Series([2, 1, 1], index=[True, pd.NA, False]), ), ( Series([False, True, True, pd.NA]), @@ -195,7 +195,7 @@ def test_value_counts_categorical_with_nan(self): ( Series(range(3), index=[True, False, np.nan]).index, False, - Series([1, 1, 1], index=[True, False, pd.NA]), + Series([1, 1, 1], index=[pd.NA, False, True]), ), ], ) From 7a68a45c1e9bc6feafdc7925750b9f3aeb452974 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 10:25:41 +0300 Subject: [PATCH 35/71] CLN: remove unused variable in Series.sort_values --- pandas/core/series.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 6af93edb7c5ff..c3fb668f98788 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3287,7 +3287,6 @@ def sort_values( arr = self._values if key: - bad = isna(arr) good = ~isna(arr) arr[good] = ensure_key_mapped(self[good], key) From 4c72dbf561965bbda53e8928354a4d803cbb5972 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 11:54:15 +0300 Subject: [PATCH 36/71] BUG: set values in key func support in Series.sort_values --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index c3fb668f98788..ef32f2fedd2ce 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3288,7 +3288,7 @@ def sort_values( if key: good = ~isna(arr) - arr[good] = ensure_key_mapped(self[good], key) + arr[good] = ensure_key_mapped(self[good], key).values # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position) From 359959171aaa1379265f45d02b9c1b1ce5415908 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 12:16:18 +0300 Subject: [PATCH 37/71] DOC: add whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index dfd2b47da4ed2..4ddf8d5a7851a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -527,6 +527,7 @@ Other - Fixed metadata propagation in the :class:`Series.dt` and :class:`Series.str` accessors and :class:`DataFrame.duplicated` and ::class:`DataFrame.stack` methods (:issue:`28283`) - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) +- Sorting in descending order being unstable when using :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses (:issue:`35992`) .. --------------------------------------------------------------------------- From d63293c2a0760c5f319dd565ba1986459da3edfa Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 14:18:09 +0300 Subject: [PATCH 38/71] BUG: add SparseArray sorting with key func support --- pandas/core/series.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index ef32f2fedd2ce..3216e8786e585 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,7 +72,7 @@ from pandas.core.aggregation import aggregate, transform from pandas.core.arrays import ExtensionArray from pandas.core.arrays.categorical import CategoricalAccessor -from pandas.core.arrays.sparse import SparseAccessor +from pandas.core.arrays.sparse import SparseAccessor, SparseArray import pandas.core.common as com from pandas.core.construction import ( array as pd_array, @@ -3287,8 +3287,12 @@ def sort_values( arr = self._values if key: - good = ~isna(arr) - arr[good] = ensure_key_mapped(self[good], key).values + if isinstance(arr, SparseArray): + # SparseArray doesn't store NaNs item-by-item, so pass everything + arr = ensure_key_mapped(self, key)._values + else: + good = ~isna(arr) + arr[good] = ensure_key_mapped(self[good], key)._values # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position) From a5c8f65951ed4dc53758da06527281e5abb28bc7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 14:34:56 +0300 Subject: [PATCH 39/71] TST: remove datetime-like xfails when testing indices with missing --- pandas/tests/indexes/test_common.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 6a681ede8ff42..55f4446534e45 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -516,12 +516,7 @@ def test_sort_values_with_missing(index_with_missing, na_position): # GH 35584. Test that sort_values works with missing values, # sort non-missing and place missing according to na_position - if isinstance(index_with_missing, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # datetime-like indices will get na_position kwarg as part of - # synchronizing duplicate-sorting behavior, because we currently expect - # them, other indices, and Series to sort differently (xref 35922) - pytest.xfail("sort_values does not support na_position kwarg") - elif isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): + if isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): pytest.xfail("missing value sorting order not defined for index type") missing_count = np.sum(index_with_missing.isna()) From fc90ea9905b55369e50493e54d0894ced6fd0af1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 14:48:28 +0300 Subject: [PATCH 40/71] TST: fix expect dupe sort order in doctests --- pandas/core/base.py | 4 ++-- pandas/core/frame.py | 2 +- pandas/core/generic.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 67621cf585793..86bccf7cdf328 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -933,9 +933,9 @@ def value_counts( >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) >>> index.value_counts() 3.0 2 - 4.0 1 - 2.0 1 1.0 1 + 2.0 1 + 4.0 1 dtype: int64 With `normalize` set to `True`, returns the relative frequency by diff --git a/pandas/core/frame.py b/pandas/core/frame.py index eb150dd87347f..91fbaa06661e3 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5535,8 +5535,8 @@ def value_counts( >>> df.value_counts() num_legs num_wings 4 0 2 - 6 0 1 2 2 1 + 6 0 1 dtype: int64 >>> df.value_counts(sort=False) diff --git a/pandas/core/generic.py b/pandas/core/generic.py index e000fe5fa733d..0b0c521336a19 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10108,7 +10108,7 @@ def describe( categorical count 3 unique 3 - top f + top d freq 1 Excluding numeric columns from a ``DataFrame`` description. From 408abe0138fc4474d17b398f186a727d65d098d9 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 14:49:31 +0300 Subject: [PATCH 41/71] BUG: fix bug in Series reconstruction after sorting --- pandas/core/series.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 3216e8786e585..05ff722731cb6 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3297,7 +3297,9 @@ def sort_values( # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position) - result = self._constructor(arr[sorted_index], index=self.index[sorted_index]) + result = self._constructor( + self._values[sorted_index], index=self.index[sorted_index] + ) if ignore_index: result.index = ibase.default_index(len(sorted_index)) From 5f53cfc28a41a7322277e4e9cadc557ad01d7ecb Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 15:26:30 +0300 Subject: [PATCH 42/71] TST: fix expect dupe sort order in more doctests --- pandas/core/base.py | 4 ++-- pandas/core/frame.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index 86bccf7cdf328..abbcf3be7e51e 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -944,9 +944,9 @@ def value_counts( >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) >>> s.value_counts(normalize=True) 3.0 0.4 - 4.0 0.2 - 2.0 0.2 1.0 0.2 + 2.0 0.2 + 4.0 0.2 dtype: float64 **bins** diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 91fbaa06661e3..c8506a9a6fb82 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -5556,8 +5556,8 @@ def value_counts( >>> df.value_counts(normalize=True) num_legs num_wings 4 0 0.50 - 6 0 0.25 2 2 0.25 + 6 0 0.25 dtype: float64 """ if subset is None: From 4370f270ddf336f093ce1d37f47d7635b4391848 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 15:27:28 +0300 Subject: [PATCH 43/71] BUG: support key func changing ndarray dtype --- pandas/core/series.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 05ff722731cb6..bc9173be75dc1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3292,6 +3292,8 @@ def sort_values( arr = ensure_key_mapped(self, key)._values else: good = ~isna(arr) + keyed = ensure_key_mapped(self[good], key)._values + arr = arr.astype(keyed.dtype) arr[good] = ensure_key_mapped(self[good], key)._values # GH 35922. Make sorting stable by leveraging nargsort From 6099344dd1d9ad5fb3decbd4a46d87b233032079 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 15:33:24 +0300 Subject: [PATCH 44/71] REFACT: reuse keyed --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index bc9173be75dc1..5d27879f10739 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3294,7 +3294,7 @@ def sort_values( good = ~isna(arr) keyed = ensure_key_mapped(self[good], key)._values arr = arr.astype(keyed.dtype) - arr[good] = ensure_key_mapped(self[good], key)._values + arr[good] = keyed # GH 35922. Make sorting stable by leveraging nargsort sorted_index = nargsort(arr, kind, ascending, na_position) From bfa2b2865d49ac022a46e0a9e91efff69d3db071 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 15:44:13 +0300 Subject: [PATCH 45/71] TST: remove datetime-like xfails from invalid_na_position --- pandas/tests/indexes/test_common.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 55f4446534e45..c97b2750bea63 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -498,12 +498,7 @@ def test_ravel_deprecation(self, index): @pytest.mark.parametrize("na_position", [None, "middle"]) def test_sort_values_invalid_na_position(index_with_missing, na_position): - if isinstance(index_with_missing, (DatetimeIndex, PeriodIndex, TimedeltaIndex)): - # datetime-like indices will get na_position kwarg as part of - # synchronizing duplicate-sorting behavior, because we currently expect - # them, other indices, and Series to sort differently (xref 35922) - pytest.xfail("sort_values does not support na_position kwarg") - elif isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): + if isinstance(index_with_missing, (CategoricalIndex, MultiIndex)): pytest.xfail("missing value sorting order not defined for index type") if na_position not in ["first", "last"]: From bc004eca450ebf809062bc1124578ce89c6d8009 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 15:54:51 +0300 Subject: [PATCH 46/71] TST: fix expect dupe sort order in more doctests in base.py --- pandas/core/base.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index abbcf3be7e51e..b5470592d0fd0 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -957,8 +957,8 @@ def value_counts( number of half-open bins. >>> s.value_counts(bins=3) - (2.0, 3.0] 2 (0.996, 2.0] 2 + (2.0, 3.0] 2 (3.0, 4.0] 1 dtype: int64 @@ -968,10 +968,10 @@ def value_counts( >>> s.value_counts(dropna=False) 3.0 2 - NaN 1 - 4.0 1 - 2.0 1 1.0 1 + 2.0 1 + 4.0 1 + NaN 1 dtype: int64 """ result = value_counts( From cd7111ef62d2a0866f1f9b631b33ed6ddd0e514e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 16:17:17 +0300 Subject: [PATCH 47/71] CLN: remove unnecessary imports in indexes/test_common.py --- pandas/tests/indexes/test_common.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index c97b2750bea63..68d728783e6a6 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -15,11 +15,8 @@ import pandas as pd from pandas import ( CategoricalIndex, - DatetimeIndex, MultiIndex, - PeriodIndex, RangeIndex, - TimedeltaIndex, ) import pandas._testing as tm From 5fbbf7d03f1af670c0f3b5f1670690a32915b9d8 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Fri, 23 Oct 2020 16:26:22 +0300 Subject: [PATCH 48/71] CLN: run black --- pandas/tests/indexes/test_common.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/tests/indexes/test_common.py b/pandas/tests/indexes/test_common.py index 68d728783e6a6..d47582566fe94 100644 --- a/pandas/tests/indexes/test_common.py +++ b/pandas/tests/indexes/test_common.py @@ -13,11 +13,7 @@ from pandas.core.dtypes.common import is_period_dtype, needs_i8_conversion import pandas as pd -from pandas import ( - CategoricalIndex, - MultiIndex, - RangeIndex, -) +from pandas import CategoricalIndex, MultiIndex, RangeIndex import pandas._testing as tm From 2946e468652776561edce65ed1648b58e48a79b6 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 10:22:11 +0300 Subject: [PATCH 49/71] try removing if in ensure_key_mapped to find tests --- pandas/core/sorting.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e2552c2ea5c3b..004c1296b0a3e 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -508,10 +508,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): result = Index(result) else: type_of_values = type(values) - # GH 35922. Support sorting objects that raise when cast to their type - if not isinstance(result, type_of_values): - # try to revert to original type otherwise - result = type_of_values(result) + result = type_of_values(result) except TypeError: raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ From 488596ccbce3d0a54cb5b4d5fb280d897119b0ba Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 10:53:58 +0300 Subject: [PATCH 50/71] BUG: just apply key func to the whole Series in sort_values --- pandas/core/series.py | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 51fdf09c8e71b..351a465e3f0ba 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3284,20 +3284,12 @@ def sort_values( if not is_bool(ascending): raise ValueError("ascending must be boolean") - arr = self._values - - if key: - if isinstance(arr, SparseArray): - # SparseArray doesn't store NaNs item-by-item, so pass everything - arr = ensure_key_mapped(self, key)._values - else: - good = ~isna(arr) - keyed = ensure_key_mapped(self[good], key)._values - arr = arr.astype(keyed.dtype) - arr[good] = keyed - # GH 35922. Make sorting stable by leveraging nargsort - sorted_index = nargsort(arr, kind, ascending, na_position) + if key: + ser = ensure_key_mapped(self, key) + sorted_index = nargsort(ser._values, kind, ascending, na_position) + else: + sorted_index = nargsort(self._values, kind, ascending, na_position) result = self._constructor( self._values[sorted_index], index=self.index[sorted_index] From c8cd8cddba1b281207519a399c67f9e2f2e16fb7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 11:12:43 +0300 Subject: [PATCH 51/71] remove legacy try/except with default to quicksort --- pandas/core/sorting.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 004c1296b0a3e..6d1a8950bbf67 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -375,14 +375,7 @@ def nargsort( non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] - # GH 35922. Move support for object sort here from Series.sort_values - try: - # if kind==mergesort, it can fail for object dtype - indexer = non_nan_idx[non_nans.argsort(kind=kind)] - except TypeError: - # stable sort not available for object dtype - # uses the argsort default quicksort - indexer = non_nan_idx[non_nans.argsort(kind="quicksort")] + indexer = non_nan_idx[non_nans.argsort(kind=kind)] if not ascending: indexer = indexer[::-1] From ab71697f630bc30ce3930e78bb8c3e8c5065134b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 11:20:20 +0300 Subject: [PATCH 52/71] DOC: expand whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 9a55abdeea54f..22beb079dde00 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order being unstable when using :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, and will affect the index optionally returned by :meth:`Index.sort_values` (:issue:`35992`) .. --------------------------------------------------------------------------- From 05f60d5a1eba14dbd8b555d74e3b5787769a9a9f Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 11:39:46 +0300 Subject: [PATCH 53/71] CLN: remove unnecessary SparseArray import --- pandas/core/series.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 351a465e3f0ba..463b1d969265e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -72,7 +72,7 @@ from pandas.core.aggregation import aggregate, transform from pandas.core.arrays import ExtensionArray from pandas.core.arrays.categorical import CategoricalAccessor -from pandas.core.arrays.sparse import SparseAccessor, SparseArray +from pandas.core.arrays.sparse import SparseAccessor import pandas.core.common as com from pandas.core.construction import ( array as pd_array, From 8669e89e5c9f4558b0a01bdfe69389f798ef5434 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 12:47:24 +0300 Subject: [PATCH 54/71] restart tests From 00b454c421dda6f3e8f90be53be1d0e545f9d4f4 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 13:55:22 +0300 Subject: [PATCH 55/71] restart tests again (windows_np18 misbehaving) From 5d5f3d019f574a66c5e4f23c60578d57be22bb29 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 14:48:36 +0300 Subject: [PATCH 56/71] restart tests again From 3d7f47cce2d286601a34a472fdf6ac975db97a34 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 16:56:11 +0300 Subject: [PATCH 57/71] DOC: add information on sorting NaTs to whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 22beb079dde00..c74bdd18ec87d 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, and will affect the index optionally returned by :meth:`Index.sort_values` (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns and will affect the index optionally returned by :meth:`Index.sort_values`. Default position of NaT values when sorting is now last both for ascending and descending sort and should be changed by supplying the ``na_position`` arg. This mirrors :meth:`Index.sort_values` for other :class:`Index` subclasses when sorting missing values. (:issue:`35992`) .. --------------------------------------------------------------------------- From 351a0033f82cef5eb1dc751538d195f7f4a68f65 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Tue, 27 Oct 2020 17:13:32 +0300 Subject: [PATCH 58/71] DOC: phrasing change in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index c74bdd18ec87d..2bc6af7ca925f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns and will affect the index optionally returned by :meth:`Index.sort_values`. Default position of NaT values when sorting is now last both for ascending and descending sort and should be changed by supplying the ``na_position`` arg. This mirrors :meth:`Index.sort_values` for other :class:`Index` subclasses when sorting missing values. (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns and will affect the index optionally returned by :meth:`Index.sort_values`. Default position of NaT values when sorting is now last both for ascending and descending sort and should be changed by supplying the ``na_position`` arg. This mirrors :meth:`Index.sort_values` behavior for other :class:`Index` subclasses when sorting missing values. (:issue:`35992`) .. --------------------------------------------------------------------------- From 18bb14169c5eedaf108d3e38f825298b028f8ea0 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:08:49 +0300 Subject: [PATCH 59/71] DOC: clarify whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 2bc6af7ca925f..07f97de3dc128 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns and will affect the index optionally returned by :meth:`Index.sort_values`. Default position of NaT values when sorting is now last both for ascending and descending sort and should be changed by supplying the ``na_position`` arg. This mirrors :meth:`Index.sort_values` behavior for other :class:`Index` subclasses when sorting missing values. (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, and the index optionally returned by :meth:`Index.sort_values`. Default position of missing values is now last in the list of duplicates when using :meth:`Series.value_counts`, corresponding to the default in :meth:`Series.sort_values` and :meth:`Index.sort_values`. (:issue:`35992`) .. --------------------------------------------------------------------------- From 9b9730235b42a6a6024af0bd136e25c09dd7bd0a Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:18:12 +0300 Subject: [PATCH 60/71] CLN: clean up unnecessary newlines in sorting.py Also bring back the cast comment --- pandas/core/sorting.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 6d1a8950bbf67..e2ec14f4de9d4 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -374,9 +374,7 @@ def nargsort( if not ascending: non_nans = non_nans[::-1] non_nan_idx = non_nan_idx[::-1] - indexer = non_nan_idx[non_nans.argsort(kind=kind)] - if not ascending: indexer = indexer[::-1] # Finally, place the NaNs at the end or the beginning according to @@ -501,7 +499,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): result = Index(result) else: type_of_values = type(values) - result = type_of_values(result) + result = type_of_values(result) # try to revert to original type otherwise except TypeError: raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ From 3a88ebe39705971020bda585d1c36913e4799d95 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:22:22 +0300 Subject: [PATCH 61/71] DOC: clarify whatsnew some more --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 07f97de3dc128..ba9d540685de3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, and the index optionally returned by :meth:`Index.sort_values`. Default position of missing values is now last in the list of duplicates when using :meth:`Series.value_counts`, corresponding to the default in :meth:`Series.sort_values` and :meth:`Index.sort_values`. (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. (:issue:`35992`) .. --------------------------------------------------------------------------- From d41789e2bd2b53b60fe68004130886492e92c903 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:26:53 +0300 Subject: [PATCH 62/71] bring back na_position validation in Series.sort_values --- pandas/core/series.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/series.py b/pandas/core/series.py index 463b1d969265e..05855c0c27c0e 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3284,6 +3284,9 @@ def sort_values( if not is_bool(ascending): raise ValueError("ascending must be boolean") + if na_position not in ["first", "last"]: + raise ValueError(f"invalid na_position: {na_position}") + # GH 35922. Make sorting stable by leveraging nargsort if key: ser = ensure_key_mapped(self, key) From 812f312fdba577d3e00021ebecd2ea972698c89e Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:30:39 +0300 Subject: [PATCH 63/71] DOC: add to whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ba9d540685de3..f647013650168 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs are now sorted according to the ``na_position`` argument, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`) .. --------------------------------------------------------------------------- From e28ce4d52375d3d946c30ad1e1d8b7dde37c31c2 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:33:57 +0300 Subject: [PATCH 64/71] DOC: clarify NaTs sorting changes in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index f647013650168..23182d379f302 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -535,7 +535,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs are now sorted according to the ``na_position`` argument, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`) +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beggining. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`) .. --------------------------------------------------------------------------- From 0719633ae8aa5878c34a97ffdb2a66e22cc0b3af Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:41:53 +0300 Subject: [PATCH 65/71] DOC: add other api changes to whatsnew; move doc there --- doc/source/whatsnew/v1.2.0.rst | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 23182d379f302..a052f895518db 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -305,6 +305,13 @@ Optional libraries below the lowest tested version may still work, but are not c See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. +.. _whatsnew_200.api.other + +Other API changes +^^^^^^^^^^^^^^^^^ + +- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beggining. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`) + .. --------------------------------------------------------------------------- .. _whatsnew_120.deprecations: @@ -535,7 +542,6 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError``, from a bare ``Exception`` previously (:issue:`35744`) - Bug in ``accessor.DirNamesMixin``, where ``dir(obj)`` wouldn't show attributes defined on the instance (:issue:`37173`). -- Sorting in descending order is now stable for :meth:`Series.sort_values` and :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses. This will affect sort order when sorting :class:`DataFrame` on multiple columns, sorting with a key function that produces duplicates, or requesting the sorting index when using :meth:`Index.sort_values`. When using :meth:`Series.value_counts`, count of missing values is no longer the last in the list of duplicate counts, and its position corresponds to the position in the original :class:`Series`. When using :meth:`Index.sort_values` for DateTime-like :class:`Index` subclasses, NaTs ignored the ``na_position`` argument and were sorted to the beggining. Now they respect ``na_position``, the default being ``last``, same as other :class:`Index` subclasses. (:issue:`35992`) .. --------------------------------------------------------------------------- From 61ac60d6fc0cb2bb639d922cbc8af11588b56036 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Wed, 28 Oct 2020 10:42:56 +0300 Subject: [PATCH 66/71] CLN: run black --- pandas/core/sorting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index e2ec14f4de9d4..1132234ae7f8d 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -499,7 +499,7 @@ def ensure_key_mapped(values, key: Optional[Callable], levels=None): result = Index(result) else: type_of_values = type(values) - result = type_of_values(result) # try to revert to original type otherwise + result = type_of_values(result) # try to revert to original type otherwise except TypeError: raise TypeError( f"User-provided `key` function returned an invalid type {type(result)} \ From 36932cd5b4376b5f3459f5acc39b2e3d287ebf8c Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 29 Oct 2020 11:55:12 +0300 Subject: [PATCH 67/71] DOC: attempt fixing malformed link in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 051c0ee4602e3..ee028291affcd 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -321,7 +321,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) -- Deprecated parameter ``dtype`` in :meth:`~Index.copy` on method all index classes. Use the :meth:`~Index.astype` method instead for changing dtype (:issue:`35853`) +- Deprecated parameter ``dtype`` in :meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype (:issue:`35853`) - Deprecated parameters ``levels`` and ``codes`` in :meth:`~MultiIndex.copy`. Use the :meth:`~MultiIndex.set_levels` and :meth:`~MultiIndex.set_codes` methods instead (:issue:`36685`) - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) - :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`) From 2156c64fe9770916fe18b5f528c33e6924877aa1 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 29 Oct 2020 11:56:25 +0300 Subject: [PATCH 68/71] Revert "DOC: attempt fixing malformed link in whatsnew" This reverts commit 36932cd5b4376b5f3459f5acc39b2e3d287ebf8c. --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index ee028291affcd..051c0ee4602e3 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -321,7 +321,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`) -- Deprecated parameter ``dtype`` in :meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype (:issue:`35853`) +- Deprecated parameter ``dtype`` in :meth:`~Index.copy` on method all index classes. Use the :meth:`~Index.astype` method instead for changing dtype (:issue:`35853`) - Deprecated parameters ``levels`` and ``codes`` in :meth:`~MultiIndex.copy`. Use the :meth:`~MultiIndex.set_levels` and :meth:`~MultiIndex.set_codes` methods instead (:issue:`36685`) - Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`) - :meth:`DataFrame.lookup` is deprecated and will be removed in a future version, use :meth:`DataFrame.melt` and :meth:`DataFrame.loc` instead (:issue:`18682`) From e6f5741c25d71ea4f55917a115d6327e9d8dd90b Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 29 Oct 2020 12:46:49 +0300 Subject: [PATCH 69/71] DOC: fix broken link in whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 051c0ee4602e3..967efa11da331 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -307,7 +307,7 @@ Optional libraries below the lowest tested version may still work, but are not c See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more. -.. _whatsnew_200.api.other +.. _whatsnew_200.api.other: Other API changes ^^^^^^^^^^^^^^^^^ From c8230433c749859db936c9f651ebb0c27804add2 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Thu, 29 Oct 2020 17:13:08 +0300 Subject: [PATCH 70/71] restart tests From 37a643985713198e7096fe594b1463d1e340ccd7 Mon Sep 17 00:00:00 2001 From: Alexander Kirko Date: Sat, 31 Oct 2020 07:48:21 +0300 Subject: [PATCH 71/71] REFACT: clean up key if/else in Series.sort_values --- pandas/core/series.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/core/series.py b/pandas/core/series.py index 05855c0c27c0e..556e9b20424bb 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -3288,11 +3288,8 @@ def sort_values( raise ValueError(f"invalid na_position: {na_position}") # GH 35922. Make sorting stable by leveraging nargsort - if key: - ser = ensure_key_mapped(self, key) - sorted_index = nargsort(ser._values, kind, ascending, na_position) - else: - sorted_index = nargsort(self._values, kind, ascending, na_position) + values_to_sort = ensure_key_mapped(self, key)._values if key else self._values + sorted_index = nargsort(values_to_sort, kind, ascending, na_position) result = self._constructor( self._values[sorted_index], index=self.index[sorted_index]