Skip to content

Commit

Permalink
Merge pull request #10411 from shoyer/max_distance
Browse files Browse the repository at this point in the history
ENH: tolerance argument for limiting pad, backfill and nearest neighbor reindexing
  • Loading branch information
shoyer committed Aug 18, 2015
2 parents 931e0e5 + 0468cad commit 5052900
Show file tree
Hide file tree
Showing 12 changed files with 328 additions and 56 deletions.
24 changes: 24 additions & 0 deletions doc/source/basics.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1100,6 +1100,30 @@ Note that the same result could have been achieved using
increasing or descreasing. :meth:`~Series.fillna` and :meth:`~Series.interpolate`
will not make any checks on the order of the index.

.. _basics.limits_on_reindex_fill:

Limits on filling while reindexing
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The ``limit`` and ``tolerance`` arguments provide additional control over
filling while reindexing. Limit specifies the maximum count of consecutive
matches:

.. ipython:: python
ts2.reindex(ts.index, method='ffill', limit=1)
In contrast, tolerance specifies the maximum distance between the index and
indexer values:

.. ipython:: python
ts2.reindex(ts.index, method='ffill', tolerance='1 day')
Notice that when used on a ``DatetimeIndex``, ``TimedeltaIndex`` or
``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible.
This allows you to specify tolerance with appropriate strings.

.. _basics.drop:

Dropping labels from an axis
Expand Down
16 changes: 16 additions & 0 deletions doc/source/whatsnew/v0.17.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,22 @@ Other enhancements
s.drop_duplicates(keep=False)


- Reindex now has a ``tolerance`` argument that allows for finer control of :ref:`basics.limits_on_reindex_fill`:

.. ipython:: python

df = pd.DataFrame({'x': range(5), 't': pd.date_range('2000-01-01', periods=5)})
df.reindex([0.1, 1.9, 3.5], method='nearest', tolerance=0.2)

When used on a ``DatetimeIndex``, ``TimedeltaIndex`` or ``PeriodIndex``, ``tolerance`` will coerced into a ``Timedelta`` if possible. This allows you to specify tolerance with a string:

.. ipython:: python

df = df.set_index('t')
df.reindex(pd.to_datetime(['1999-12-31']), method='nearest', tolerance='1 day')

``tolerance`` is also exposed by the lower level ``Index.get_indexer`` and ``Index.get_loc`` methods.

.. _whatsnew_0170.api:

.. _whatsnew_0170.api_breaking:
Expand Down
17 changes: 10 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2518,33 +2518,36 @@ def lookup(self, row_labels, col_labels):
#----------------------------------------------------------------------
# Reindexing and alignment

def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
def _reindex_axes(self, axes, level, limit, tolerance, method,
fill_value, copy):
frame = self

columns = axes['columns']
if columns is not None:
frame = frame._reindex_columns(columns, copy, level, fill_value,
limit)
limit, tolerance)

index = axes['index']
if index is not None:
frame = frame._reindex_index(index, method, copy, level,
fill_value, limit)
fill_value, limit, tolerance)

return frame

def _reindex_index(self, new_index, method, copy, level, fill_value=NA,
limit=None):
limit=None, tolerance=None):
new_index, indexer = self.index.reindex(new_index, method, level,
limit=limit)
limit=limit,
tolerance=tolerance)
return self._reindex_with_indexers({0: [new_index, indexer]},
copy=copy, fill_value=fill_value,
allow_dups=False)

def _reindex_columns(self, new_columns, copy, level, fill_value=NA,
limit=None):
limit=None, tolerance=None):
new_columns, indexer = self.columns.reindex(new_columns, level=level,
limit=limit)
limit=limit,
tolerance=tolerance)
return self._reindex_with_indexers({1: [new_columns, indexer]},
copy=copy, fill_value=fill_value,
allow_dups=False)
Expand Down
42 changes: 32 additions & 10 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -922,7 +922,7 @@ def to_hdf(self, path_or_buf, key, **kwargs):
in the store wherever possible
fletcher32 : bool, default False
If applying compression use the fletcher32 checksum
dropna : boolean, default False.
dropna : boolean, default False.
If true, ALL nan rows will not be written to store.
"""
Expand Down Expand Up @@ -1551,7 +1551,8 @@ def select(self, crit, axis=0):

return self.reindex(**{axis_name: new_axis})

def reindex_like(self, other, method=None, copy=True, limit=None):
def reindex_like(self, other, method=None, copy=True, limit=None,
tolerance=None):
""" return an object with matching indicies to myself
Parameters
Expand All @@ -1560,7 +1561,12 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
method : string or None
copy : boolean, default True
limit : int, default None
Maximum size gap to forward or backward fill
Maximum number of consecutive labels to fill for inexact matches.
tolerance : optional
Maximum distance between labels of the other object and this
object for inexact matches.
.. versionadded:: 0.17.0
Notes
-----
Expand All @@ -1572,7 +1578,8 @@ def reindex_like(self, other, method=None, copy=True, limit=None):
reindexed : same as input
"""
d = other._construct_axes_dict(axes=self._AXIS_ORDERS,
method=method, copy=copy, limit=limit)
method=method, copy=copy, limit=limit,
tolerance=tolerance)

return self.reindex(**d)

Expand Down Expand Up @@ -1736,7 +1743,13 @@ def sort_index(self, axis=0, ascending=True):
Value to use for missing values. Defaults to NaN, but can be any
"compatible" value
limit : int, default None
Maximum size gap to forward or backward fill
Maximum number of consecutive elements to forward or backward fill
tolerance : optional
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations most
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
.. versionadded:: 0.17.0
Examples
--------
Expand All @@ -1758,6 +1771,7 @@ def reindex(self, *args, **kwargs):
level = kwargs.pop('level', None)
copy = kwargs.pop('copy', True)
limit = kwargs.pop('limit', None)
tolerance = kwargs.pop('tolerance', None)
fill_value = kwargs.pop('fill_value', np.nan)

if kwargs:
Expand All @@ -1782,10 +1796,11 @@ def reindex(self, *args, **kwargs):
pass

# perform the reindex on the axes
return self._reindex_axes(axes, level, limit,
return self._reindex_axes(axes, level, limit, tolerance,
method, fill_value, copy).__finalize__(self)

def _reindex_axes(self, axes, level, limit, method, fill_value, copy):
def _reindex_axes(self, axes, level, limit, tolerance, method,
fill_value, copy):
""" perform the reinxed for all the axes """
obj = self
for a in self._AXIS_ORDERS:
Expand All @@ -1795,7 +1810,8 @@ def _reindex_axes(self, axes, level, limit, method, fill_value, copy):

ax = self._get_axis(a)
new_index, indexer = ax.reindex(
labels, level=level, limit=limit, method=method)
labels, level=level, limit=limit, tolerance=tolerance,
method=method)

axis = self._get_axis_number(a)
obj = obj._reindex_with_indexers(
Expand Down Expand Up @@ -1836,7 +1852,13 @@ def _reindex_multi(self, axes, copy, fill_value):
Broadcast across a level, matching Index values on the
passed MultiIndex level
limit : int, default None
Maximum size gap to forward or backward fill
Maximum number of consecutive elements to forward or backward fill
tolerance : optional
Maximum distance between original and new labels for inexact
matches. The values of the index at the matching locations most
satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
.. versionadded:: 0.17.0
Examples
--------
Expand Down Expand Up @@ -2910,7 +2932,7 @@ def interpolate(self, method='linear', axis=0, limit=None, inplace=False,
use the actual numerical values of the index
* 'krogh', 'piecewise_polynomial', 'spline', and 'pchip' are all
wrappers around the scipy interpolation methods of similar
names. These use the actual numerical values of the index. See
names. These use the actual numerical values of the index. See
the scipy documentation for more on their behavior:
http://docs.scipy.org/doc/scipy/reference/interpolate.html#univariate-interpolation
http://docs.scipy.org/doc/scipy/reference/tutorial/interpolate.html
Expand Down
Loading

0 comments on commit 5052900

Please sign in to comment.