Skip to content

Commit

Permalink
REF: pandas/core/window.py into multiple files (#27736)
Browse files Browse the repository at this point in the history
  • Loading branch information
mroeschke authored and jreback committed Aug 7, 2019
1 parent 9724ace commit 3bf35c6
Show file tree
Hide file tree
Showing 12 changed files with 979 additions and 955 deletions.
7 changes: 6 additions & 1 deletion doc/source/reference/window.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,15 @@
======
Window
======
.. currentmodule:: pandas.core.window

Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.

Standard moving window functions
--------------------------------
.. currentmodule:: pandas.core.window.rolling

.. autosummary::
:toctree: api/

Expand All @@ -38,6 +39,8 @@ Standard moving window functions

Standard expanding window functions
-----------------------------------
.. currentmodule:: pandas.core.window.expanding

.. autosummary::
:toctree: api/

Expand All @@ -59,6 +62,8 @@ Standard expanding window functions

Exponentially-weighted moving window functions
----------------------------------------------
.. currentmodule:: pandas.core.window.ewm

.. autosummary::
:toctree: api/

Expand Down
29 changes: 20 additions & 9 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10683,9 +10683,9 @@ def _add_series_or_dataframe_operations(cls):
the doc strings again.
"""

from pandas.core import window as rwindow
from pandas.core.window import EWM, Expanding, Rolling, Window

@Appender(rwindow.rolling.__doc__)
@Appender(Rolling.__doc__)
def rolling(
self,
window,
Expand All @@ -10697,7 +10697,20 @@ def rolling(
closed=None,
):
axis = self._get_axis_number(axis)
return rwindow.rolling(

if win_type is not None:
return Window(
self,
window=window,
min_periods=min_periods,
center=center,
win_type=win_type,
on=on,
axis=axis,
closed=closed,
)

return Rolling(
self,
window=window,
min_periods=min_periods,
Expand All @@ -10710,16 +10723,14 @@ def rolling(

cls.rolling = rolling

@Appender(rwindow.expanding.__doc__)
@Appender(Expanding.__doc__)
def expanding(self, min_periods=1, center=False, axis=0):
axis = self._get_axis_number(axis)
return rwindow.expanding(
self, min_periods=min_periods, center=center, axis=axis
)
return Expanding(self, min_periods=min_periods, center=center, axis=axis)

cls.expanding = expanding

@Appender(rwindow.ewm.__doc__)
@Appender(EWM.__doc__)
def ewm(
self,
com=None,
Expand All @@ -10732,7 +10743,7 @@ def ewm(
axis=0,
):
axis = self._get_axis_number(axis)
return rwindow.ewm(
return EWM(
self,
com=com,
span=span,
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/window/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from pandas.core.window.ewm import EWM # noqa:F401
from pandas.core.window.expanding import Expanding, ExpandingGroupby # noqa:F401
from pandas.core.window.rolling import Rolling, RollingGroupby, Window # noqa:F401
276 changes: 276 additions & 0 deletions pandas/core/window/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
"""Common utility functions for rolling operations"""
from collections import defaultdict
import warnings

import numpy as np

from pandas.core.dtypes.common import is_integer
from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries

import pandas.core.common as com
from pandas.core.generic import _shared_docs
from pandas.core.groupby.base import GroupByMixin
from pandas.core.index import MultiIndex

_shared_docs = dict(**_shared_docs)
_doc_template = """
Returns
-------
Series or DataFrame
Return type is determined by the caller.
See Also
--------
Series.%(name)s : Series %(name)s.
DataFrame.%(name)s : DataFrame %(name)s.
"""


class _GroupByMixin(GroupByMixin):
"""
Provide the groupby facilities.
"""

def __init__(self, obj, *args, **kwargs):
parent = kwargs.pop("parent", None) # noqa
groupby = kwargs.pop("groupby", None)
if groupby is None:
groupby, obj = obj, obj.obj
self._groupby = groupby
self._groupby.mutated = True
self._groupby.grouper.mutated = True
super().__init__(obj, *args, **kwargs)

count = GroupByMixin._dispatch("count")
corr = GroupByMixin._dispatch("corr", other=None, pairwise=None)
cov = GroupByMixin._dispatch("cov", other=None, pairwise=None)

def _apply(
self, func, name=None, window=None, center=None, check_minp=None, **kwargs
):
"""
Dispatch to apply; we are stripping all of the _apply kwargs and
performing the original function call on the grouped object.
"""

def f(x, name=name, *args):
x = self._shallow_copy(x)

if isinstance(name, str):
return getattr(x, name)(*args, **kwargs)

return x.apply(name, *args, **kwargs)

return self._groupby.apply(f)


def _flex_binary_moment(arg1, arg2, f, pairwise=False):

if not (
isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame))
and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))
):
raise TypeError(
"arguments to moment function must be of type "
"np.ndarray/Series/DataFrame"
)

if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance(
arg2, (np.ndarray, ABCSeries)
):
X, Y = _prep_binary(arg1, arg2)
return f(X, Y)

elif isinstance(arg1, ABCDataFrame):
from pandas import DataFrame

def dataframe_from_int_dict(data, frame_template):
result = DataFrame(data, index=frame_template.index)
if len(result.columns) > 0:
result.columns = frame_template.columns[result.columns]
return result

results = {}
if isinstance(arg2, ABCDataFrame):
if pairwise is False:
if arg1 is arg2:
# special case in order to handle duplicate column names
for i, col in enumerate(arg1.columns):
results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
return dataframe_from_int_dict(results, arg1)
else:
if not arg1.columns.is_unique:
raise ValueError("'arg1' columns are not unique")
if not arg2.columns.is_unique:
raise ValueError("'arg2' columns are not unique")
with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", RuntimeWarning)
X, Y = arg1.align(arg2, join="outer")
X = X + 0 * Y
Y = Y + 0 * X

with warnings.catch_warnings(record=True):
warnings.simplefilter("ignore", RuntimeWarning)
res_columns = arg1.columns.union(arg2.columns)
for col in res_columns:
if col in X and col in Y:
results[col] = f(X[col], Y[col])
return DataFrame(results, index=X.index, columns=res_columns)
elif pairwise is True:
results = defaultdict(dict)
for i, k1 in enumerate(arg1.columns):
for j, k2 in enumerate(arg2.columns):
if j < i and arg2 is arg1:
# Symmetric case
results[i][j] = results[j][i]
else:
results[i][j] = f(
*_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
)

from pandas import concat

result_index = arg1.index.union(arg2.index)
if len(result_index):

# construct result frame
result = concat(
[
concat(
[results[i][j] for j, c in enumerate(arg2.columns)],
ignore_index=True,
)
for i, c in enumerate(arg1.columns)
],
ignore_index=True,
axis=1,
)
result.columns = arg1.columns

# set the index and reorder
if arg2.columns.nlevels > 1:
result.index = MultiIndex.from_product(
arg2.columns.levels + [result_index]
)
result = result.reorder_levels([2, 0, 1]).sort_index()
else:
result.index = MultiIndex.from_product(
[range(len(arg2.columns)), range(len(result_index))]
)
result = result.swaplevel(1, 0).sort_index()
result.index = MultiIndex.from_product(
[result_index] + [arg2.columns]
)
else:

# empty result
result = DataFrame(
index=MultiIndex(
levels=[arg1.index, arg2.columns], codes=[[], []]
),
columns=arg2.columns,
dtype="float64",
)

# reset our index names to arg1 names
# reset our column names to arg2 names
# careful not to mutate the original names
result.columns = result.columns.set_names(arg1.columns.names)
result.index = result.index.set_names(
result_index.names + arg2.columns.names
)

return result

else:
raise ValueError("'pairwise' is not True/False")
else:
results = {
i: f(*_prep_binary(arg1.iloc[:, i], arg2))
for i, col in enumerate(arg1.columns)
}
return dataframe_from_int_dict(results, arg1)

else:
return _flex_binary_moment(arg2, arg1, f)


def _get_center_of_mass(comass, span, halflife, alpha):
valid_count = com.count_not_none(comass, span, halflife, alpha)
if valid_count > 1:
raise ValueError("comass, span, halflife, and alpha are mutually exclusive")

# Convert to center of mass; domain checks ensure 0 < alpha <= 1
if comass is not None:
if comass < 0:
raise ValueError("comass must satisfy: comass >= 0")
elif span is not None:
if span < 1:
raise ValueError("span must satisfy: span >= 1")
comass = (span - 1) / 2.0
elif halflife is not None:
if halflife <= 0:
raise ValueError("halflife must satisfy: halflife > 0")
decay = 1 - np.exp(np.log(0.5) / halflife)
comass = 1 / decay - 1
elif alpha is not None:
if alpha <= 0 or alpha > 1:
raise ValueError("alpha must satisfy: 0 < alpha <= 1")
comass = (1.0 - alpha) / alpha
else:
raise ValueError("Must pass one of comass, span, halflife, or alpha")

return float(comass)


def _offset(window, center):
if not is_integer(window):
window = len(window)
offset = (window - 1) / 2.0 if center else 0
try:
return int(offset)
except TypeError:
return offset.astype(int)


def _require_min_periods(p):
def _check_func(minp, window):
if minp is None:
return window
else:
return max(p, minp)

return _check_func


def _use_window(minp, window):
if minp is None:
return window
else:
return minp


def _zsqrt(x):
with np.errstate(all="ignore"):
result = np.sqrt(x)
mask = x < 0

if isinstance(x, ABCDataFrame):
if mask.values.any():
result[mask] = 0
else:
if mask.any():
result[mask] = 0

return result


def _prep_binary(arg1, arg2):
if not isinstance(arg2, type(arg1)):
raise Exception("Input arrays must be of the same type!")

# mask out values, this also makes a common index...
X = arg1 + 0 * arg2
Y = arg2 + 0 * arg1

return X, Y
Loading

0 comments on commit 3bf35c6

Please sign in to comment.