Skip to content

Commit

Permalink
DOC: Fix Series nsmallest and nlargest docstring/doctests (pandas-dev…
Browse files Browse the repository at this point in the history
  • Loading branch information
Moisan authored and aeltanawy committed Sep 20, 2018
1 parent 79b8763 commit 1aaefe5
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 45 deletions.
2 changes: 1 addition & 1 deletion ci/doctests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ if [ "$DOCTEST" ]; then
fi

pytest --doctest-modules -v pandas/core/series.py \
-k"-nlargest -nonzero -nsmallest -reindex -searchsorted -to_dict"
-k"-nonzero -reindex -searchsorted -to_dict"

if [ $? -ne "0" ]; then
RET=1
Expand Down
187 changes: 143 additions & 44 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2741,17 +2741,20 @@ def nlargest(self, n=5, keep='first'):
Parameters
----------
n : int
Return this many descending sorted values
keep : {'first', 'last'}, default 'first'
Where there are duplicate values:
- ``first`` : take the first occurrence.
- ``last`` : take the last occurrence.
n : int, default 5
Return this many descending sorted values.
keep : {'first', 'last', 'all'}, default 'first'
When there are duplicate values that cannot all fit in a
Series of `n` elements:
- ``first`` : take the first occurrences based on the index order
- ``last`` : take the last occurrences based on the index order
- ``all`` : keep all occurrences. This can result in a Series of
size larger than `n`.
Returns
-------
top_n : Series
The n largest values in the Series, in sorted order
Series
The `n` largest values in the Series, sorted in decreasing order.
Notes
-----
Expand All @@ -2760,23 +2763,70 @@ def nlargest(self, n=5, keep='first'):
See Also
--------
Series.nsmallest
Series.nsmallest: Get the `n` smallest elements.
Series.sort_values: Sort Series by values.
Series.head: Return the first `n` rows.
Examples
--------
>>> s = pd.Series(np.random.randn(10**6))
>>> s.nlargest(10) # only sorts up to the N requested
219921 4.644710
82124 4.608745
421689 4.564644
425277 4.447014
718691 4.414137
43154 4.403520
283187 4.313922
595519 4.273635
503969 4.250236
121637 4.240952
dtype: float64
>>> countries_population = {"Italy": 59000000, "France": 65000000,
... "Malta": 434000, "Maldives": 434000,
... "Brunei": 434000, "Iceland": 337000,
... "Nauru": 11300, "Tuvalu": 11300,
... "Anguilla": 11300, "Monserat": 5200}
>>> s = pd.Series(countries_population)
>>> s
Italy 59000000
France 65000000
Malta 434000
Maldives 434000
Brunei 434000
Iceland 337000
Nauru 11300
Tuvalu 11300
Anguilla 11300
Monserat 5200
dtype: int64
The `n` largest elements where ``n=5`` by default.
>>> s.nlargest()
France 65000000
Italy 59000000
Malta 434000
Maldives 434000
Brunei 434000
dtype: int64
The `n` largest elements where ``n=3``. Default `keep` value is 'first'
so Malta will be kept.
>>> s.nlargest(3)
France 65000000
Italy 59000000
Malta 434000
dtype: int64
The `n` largest elements where ``n=3`` and keeping the last duplicates.
Brunei will be kept since it is the last with value 434000 based on
the index order.
>>> s.nlargest(3, keep='last')
France 65000000
Italy 59000000
Brunei 434000
dtype: int64
The `n` largest elements where ``n=3`` with all duplicates kept. Note
that the returned Series has five elements due to the three duplicates.
>>> s.nlargest(3, keep='all')
France 65000000
Italy 59000000
Malta 434000
Maldives 434000
Brunei 434000
dtype: int64
"""
return algorithms.SelectNSeries(self, n=n, keep=keep).nlargest()

Expand All @@ -2786,17 +2836,20 @@ def nsmallest(self, n=5, keep='first'):
Parameters
----------
n : int
Return this many ascending sorted values
keep : {'first', 'last'}, default 'first'
Where there are duplicate values:
- ``first`` : take the first occurrence.
- ``last`` : take the last occurrence.
n : int, default 5
Return this many ascending sorted values.
keep : {'first', 'last', 'all'}, default 'first'
When there are duplicate values that cannot all fit in a
Series of `n` elements:
- ``first`` : take the first occurrences based on the index order
- ``last`` : take the last occurrences based on the index order
- ``all`` : keep all occurrences. This can result in a Series of
size larger than `n`.
Returns
-------
bottom_n : Series
The n smallest values in the Series, in sorted order
Series
The `n` smallest values in the Series, sorted in increasing order.
Notes
-----
Expand All @@ -2805,23 +2858,69 @@ def nsmallest(self, n=5, keep='first'):
See Also
--------
Series.nlargest
Series.nlargest: Get the `n` largest elements.
Series.sort_values: Sort Series by values.
Series.head: Return the first `n` rows.
Examples
--------
>>> s = pd.Series(np.random.randn(10**6))
>>> s.nsmallest(10) # only sorts up to the N requested
288532 -4.954580
732345 -4.835960
64803 -4.812550
446457 -4.609998
501225 -4.483945
669476 -4.472935
973615 -4.401699
621279 -4.355126
773916 -4.347355
359919 -4.331927
dtype: float64
>>> countries_population = {"Italy": 59000000, "France": 65000000,
... "Brunei": 434000, "Malta": 434000,
... "Maldives": 434000, "Iceland": 337000,
... "Nauru": 11300, "Tuvalu": 11300,
... "Anguilla": 11300, "Monserat": 5200}
>>> s = pd.Series(countries_population)
>>> s
Italy 59000000
France 65000000
Brunei 434000
Malta 434000
Maldives 434000
Iceland 337000
Nauru 11300
Tuvalu 11300
Anguilla 11300
Monserat 5200
dtype: int64
The `n` largest elements where ``n=5`` by default.
>>> s.nsmallest()
Monserat 5200
Nauru 11300
Tuvalu 11300
Anguilla 11300
Iceland 337000
dtype: int64
The `n` smallest elements where ``n=3``. Default `keep` value is
'first' so Nauru and Tuvalu will be kept.
>>> s.nsmallest(3)
Monserat 5200
Nauru 11300
Tuvalu 11300
dtype: int64
The `n` smallest elements where ``n=3`` and keeping the last
duplicates. Anguilla and Tuvalu will be kept since they are the last
with value 11300 based on the index order.
>>> s.nsmallest(3, keep='last')
Monserat 5200
Anguilla 11300
Tuvalu 11300
dtype: int64
The `n` smallest elements where ``n=3`` with all duplicates kept. Note
that the returned Series has four elements due to the three duplicates.
>>> s.nsmallest(3, keep='all')
Monserat 5200
Nauru 11300
Tuvalu 11300
Anguilla 11300
dtype: int64
"""
return algorithms.SelectNSeries(self, n=n, keep=keep).nsmallest()

Expand Down

0 comments on commit 1aaefe5

Please sign in to comment.