Skip to content

Commit

Permalink
CLN/COMPAT: IntervalIndex
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 13, 2017
1 parent 74162aa commit 340c98b
Show file tree
Hide file tree
Showing 52 changed files with 3,211 additions and 3,065 deletions.
20 changes: 20 additions & 0 deletions asv_bench/benchmarks/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,26 @@ def time_is_monotonic(self):
self.miint.is_monotonic


class IntervalIndexing(object):
goal_time = 0.2

def setup(self):
self.monotonic = Series(np.arange(1000000),
index=IntervalIndex.from_breaks(np.arange(1000001)))

def time_getitem_scalar(self):
self.monotonic[80000]

def time_loc_scalar(self):
self.monotonic.loc[80000]

def time_getitem_list(self):
self.monotonic[80000:]

def time_loc_list(self):
self.monotonic.loc[80000:]


class PanelIndexing(object):
goal_time = 0.2

Expand Down
21 changes: 21 additions & 0 deletions doc/source/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1405,6 +1405,27 @@ Categorical Components
CategoricalIndex.as_ordered
CategoricalIndex.as_unordered

.. _api.intervalindex:

IntervalIndex
-------------

.. autosummary::
:toctree: generated/

IntervalIndex

IntervalIndex Components
~~~~~~~~~~~~~~~~~~~~~~~~

.. autosummary::
:toctree: generated/

IntervalIndex.from_arrays
IntervalIndex.from_tuples
IntervalIndex.from_breaks
IntervalIndex.from_intervals

.. _api.multiindex:

MultiIndex
Expand Down
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Highlights include:
- ``Panel`` has been deprecated, see :ref:`here <whatsnew_0200.api_breaking.deprecate_panel>`
- Improved user API when accessing levels in ``.groupby()``, see :ref:`here <whatsnew_0200.enhancements.groupby_access>`
- Improved support for UInt64 dtypes, see :ref:`here <whatsnew_0200.enhancements.uint64_support>`
- Addition of an ``IntervalIndex`` and ``Interval`` scalar type, see :ref:`here <whatsnew_0200.enhancements.intervalindex>`
- A new orient for JSON serialization, ``orient='table'``, that uses the Table Schema spec, see :ref:`here <whatsnew_0200.enhancements.table_schema>`
- Window Binary Corr/Cov operations return a MultiIndexed ``DataFrame`` rather than a ``Panel``, as ``Panel`` is now deprecated, see :ref:`here <whatsnew_0200.api_breaking.rolling_pairwise>`
- Support for S3 handling now uses ``s3fs``, see :ref:`here <whatsnew_0200.api_breaking.s3>`
Expand Down Expand Up @@ -314,6 +315,36 @@ To convert a ``SparseDataFrame`` back to sparse SciPy matrix in COO format, you

sdf.to_coo()

.. _whatsnew_0200.enhancements.intervalindex:

IntervalIndex
^^^^^^^^^^^^^

pandas has gain an ``IntervalIndex`` with its own dtype, ``interval`` as well as the ``Interval`` scalar type. These allow first-class support for interval
notation, specifically as return type for ``pd.cut`` and ``pd.qcut``. (:issue:`7640`, :issue:`8625`)

**Previous behavior**:

.. code-block:: ipython

In [2]: pd.cut(range(3), 2)
Out[2]:
[(-0.002, 1], (-0.002, 1], (1, 2]]
Categories (2, object): [(-0.002, 1] < (1, 2]]

# the returned categories are strings, representing Intervals
In [3]: pd.cut(range(3), 2).categories
Out[3]: Index(['(-0.002, 1]', '(1, 2]'], dtype='object')

**New behavior**:

.. ipython:: python

c = pd.cut(range(3), 2)
c
c.categories
pd.api.types.is_interval_dtype(c.categories)

.. _whatsnew_0200.enhancements.other:

Other Enhancements
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/hashtable.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ PyDateTime_IMPORT
cdef extern from "Python.h":
int PySlice_Check(object)

cdef size_t _INIT_VEC_CAP = 128

include "hashtable_class_helper.pxi"
include "hashtable_func_helper.pxi"

Expand Down
104 changes: 74 additions & 30 deletions pandas/src/interval.pyx → pandas/_libs/interval.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@ cimport numpy as np
import numpy as np
import pandas as pd

cimport util
cimport cython
import cython
from numpy cimport *
from tslib import Timestamp

from cpython.object cimport (Py_EQ, Py_NE, Py_GT, Py_LT, Py_GE, Py_LE,
PyObject_RichCompare)
Expand Down Expand Up @@ -44,6 +47,20 @@ cdef _interval_like(other):


cdef class Interval(IntervalMixin):
"""
Immutable object implementing an Interval, a bounded slice-like interval.
.. versionadded:: 0.20.0
Properties
----------
left, right : values
Left and right bounds for each interval.
closed : {'left', 'right', 'both', 'neither'}
Whether the interval is closed on the left-side, right-side, both or
neither. Defaults to 'right'.
"""

cdef readonly object left, right
cdef readonly str closed

Expand Down Expand Up @@ -84,88 +101,115 @@ cdef class Interval(IntervalMixin):
return NotImplemented
else:
op_str = {Py_LT: '<', Py_LE: '<=', Py_GT: '>', Py_GE: '>='}[op]
raise TypeError('unorderable types: %s() %s %s()' %
(type(self).__name__, op_str, type(other).__name__))
raise TypeError(
'unorderable types: %s() %s %s()' %
(type(self).__name__, op_str, type(other).__name__))

def __reduce__(self):
args = (self.left, self.right, self.closed)
return (type(self), args)

def _repr_base(self):
left = self.left
right = self.right

# TODO: need more general formatting methodology here
if isinstance(left, Timestamp) and isinstance(right, Timestamp):
left = left._short_repr
right = right._short_repr

return left, right

def __repr__(self):

left, right = self._repr_base()
return ('%s(%r, %r, closed=%r)' %
(type(self).__name__, self.left, self.right, self.closed))
(type(self).__name__, left, right, self.closed))

def __str__(self):

left, right = self._repr_base()
start_symbol = '[' if self.closed_left else '('
end_symbol = ']' if self.closed_right else ')'
return '%s%s, %s%s' % (start_symbol, self.left, self.right, end_symbol)
return '%s%s, %s%s' % (start_symbol, left, right, end_symbol)

def __add__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left + y, self.right + y)
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
return Interval(y.left + self, y.right + self)
else:
raise NotImplemented
return NotImplemented

def __sub__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left - y, self.right - y)
else:
raise NotImplemented
return NotImplemented

def __mul__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left * y, self.right * y)
elif isinstance(y, Interval) and isinstance(self, numbers.Number):
return Interval(y.left * self, y.right * self)
else:
return NotImplemented
return NotImplemented

def __div__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left / y, self.right / y)
else:
return NotImplemented
return NotImplemented

def __truediv__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left / y, self.right / y)
else:
return NotImplemented
return NotImplemented

def __floordiv__(self, y):
if isinstance(y, numbers.Number):
return Interval(self.left // y, self.right // y)
else:
return NotImplemented
return NotImplemented


@cython.wraparound(False)
@cython.boundscheck(False)
cpdef interval_bounds_to_intervals(np.ndarray left, np.ndarray right,
str closed):
result = np.empty(len(left), dtype=object)
nulls = pd.isnull(left) | pd.isnull(right)
result[nulls] = np.nan
for i in np.flatnonzero(~nulls):
result[i] = Interval(left[i], right[i], closed)
return result
cpdef intervals_to_interval_bounds(ndarray intervals):
"""
Parameters
----------
intervals: ndarray object array of Intervals / nulls
Returns
-------
tuples (left: ndarray object array,
right: ndarray object array,
closed: str)
"""

cdef:
object closed = None, interval
int64_t n = len(intervals)
ndarray left, right

left = np.empty(n, dtype=object)
right = np.empty(n, dtype=object)

@cython.wraparound(False)
@cython.boundscheck(False)
cpdef intervals_to_interval_bounds(np.ndarray intervals):
left = np.empty(len(intervals), dtype=object)
right = np.empty(len(intervals), dtype=object)
cdef str closed = None
for i in range(len(intervals)):
interval = intervals[i]
if util._checknull(interval):
left[i] = np.nan
right[i] = np.nan
continue

if not isinstance(interval, Interval):
raise TypeError("type {} with value {} is not an interval".format(
type(interval), interval))

left[i] = interval.left
right[i] = interval.right
if closed is None:
closed = interval.closed
elif closed != interval.closed:
raise ValueError('intervals must all be closed on the same side')

return left, right, closed

include "intervaltree.pxi"
Loading

0 comments on commit 340c98b

Please sign in to comment.