From 42d4e80290fc84fd0f6d9b55b57bf94da5719ec6 Mon Sep 17 00:00:00 2001 From: gfyoung Date: Wed, 7 Mar 2018 15:36:23 -0500 Subject: [PATCH] MAINT: Address reviewer comments --- doc/source/whatsnew/v0.23.0.txt | 2 +- pandas/tests/frame/test_rank.py | 76 ++++++++++++--------------------- 2 files changed, 28 insertions(+), 50 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 7e35b302a81100..8237f40181749e 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -905,7 +905,7 @@ Offsets Numeric ^^^^^^^ -- Bug in ``DataFrame.rank()`` and ``Series.rank()`` when ``method='dense'`` and ``pct=True`` (:issue:`15630`) +- Bug in :meth:`DataFrame.rank` and :meth:`Series.rank` when ``method='dense'`` and ``pct=True`` in which percentile ranks were not being used with the number of distinct observations (:issue:`15630`) - Bug in :class:`Series` constructor with an int or float list where specifying ``dtype=str``, ``dtype='str'`` or ``dtype='U'`` failed to convert the data elements to strings (:issue:`16605`) - Bug in :class:`Index` multiplication and division methods where operating with a ``Series`` would return an ``Index`` object instead of a ``Series`` object (:issue:`19042`) - Bug in the :class:`DataFrame` constructor in which data containing very large positive or very large negative numbers was causing ``OverflowError`` (:issue:`18584`) diff --git a/pandas/tests/frame/test_rank.py b/pandas/tests/frame/test_rank.py index dca82eea6ea898..b8ba408b547158 100644 --- a/pandas/tests/frame/test_rank.py +++ b/pandas/tests/frame/test_rank.py @@ -268,54 +268,32 @@ def _check2d(df, expected, method='average', axis=0): _check2d(frame, results[method], method=method, axis=axis) -# GH15630, pct should be on 100% basis when method='dense' -@pytest.mark.parametrize('frame, exp', [ - ([['2012', 'B', 3], ['2012', 'A', 2], ['2012', 'A', 1]], - [[1., 1., 1.], [1., 0.5, 2. / 3], [1., 0.5, 1. / 3]])]) -def test_rank_dense_pct(frame, exp): - df = DataFrame(frame) - result = df.rank(method='dense', pct=True) - expected = DataFrame(exp) - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize('frame, exp', [ - ([['2012', 'B', 3], ['2012', 'A', 2], ['2012', 'A', 1]], - [[1. / 3, 1., 1.], [1. / 3, 1. / 3, 2. / 3], - [1. / 3, 1. / 3, 1. / 3]])]) -def test_rank_min_pct(frame, exp): - df = DataFrame(frame) - result = df.rank(method='min', pct=True) - expected = DataFrame(exp) - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize('frame, exp', [ - ([['2012', 'B', 3], ['2012', 'A', 2], ['2012', 'A', 1]], - [[1., 1., 1.], [1., 2. / 3, 2. / 3], [1., 2. / 3, 1. / 3]])]) -def test_rank_max_pct(frame, exp): - df = DataFrame(frame) - result = df.rank(method='max', pct=True) - expected = DataFrame(exp) - assert_frame_equal(result, expected) - - -@pytest.mark.parametrize('frame, exp', [ - ([['2012', 'B', 3], ['2012', 'A', 2], ['2012', 'A', 1]], - [[2. / 3, 1., 1.], [2. / 3, 0.5, 2. / 3], [2. / 3, 0.5, 1. / 3]])]) -def test_rank_average_pct(frame, exp): - df = DataFrame(frame) - result = df.rank(method='average', pct=True) - expected = DataFrame(exp) - assert_frame_equal(result, expected) - +@pytest.mark.parametrize( + "method,exp", [("dense", + [[1., 1., 1.], + [1., 0.5, 2. / 3], + [1., 0.5, 1. / 3]]), + ("min", + [[1. / 3, 1., 1.], + [1. / 3, 1. / 3, 2. / 3], + [1. / 3, 1. / 3, 1. / 3]]), + ("max", + [[1., 1., 1.], + [1., 2. / 3, 2. / 3], + [1., 2. / 3, 1. / 3]]), + ("average", + [[2. / 3, 1., 1.], + [2. / 3, 0.5, 2. / 3], + [2. / 3, 0.5, 1. / 3]]), + ("first", + [[1. / 3, 1., 1.], + [2. / 3, 1. / 3, 2. / 3], + [3. / 3, 2. / 3, 1. / 3]])]) +def test_rank_pct_true(method, exp): + # see gh-15630. + + df = DataFrame([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]]) + result = df.rank(method=method, pct=True) -@pytest.mark.parametrize('frame, exp', [ - ([[2012, 66, 3], [2012, 65, 2], [2012, 65, 1]], - [[1. / 3, 1., 1.], [2. / 3, 1. / 3, 2. / 3], - [3. / 3, 2. / 3, 1. / 3]])]) -def test_rank_first_pct(frame, exp): - df = DataFrame(frame) - result = df.rank(method='first', pct=True) expected = DataFrame(exp) - assert_frame_equal(result, expected) + tm.assert_frame_equal(result, expected)