diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst index 9e1374a3bd8e4..2f6addf607877 100644 --- a/doc/source/reference/window.rst +++ b/doc/source/reference/window.rst @@ -5,7 +5,6 @@ ====== Window ====== -.. currentmodule:: pandas.core.window Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc. Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc. @@ -13,6 +12,8 @@ EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func: Standard moving window functions -------------------------------- +.. currentmodule:: pandas.core.window.rolling + .. autosummary:: :toctree: api/ @@ -38,6 +39,8 @@ Standard moving window functions Standard expanding window functions ----------------------------------- +.. currentmodule:: pandas.core.window.expanding + .. autosummary:: :toctree: api/ @@ -59,6 +62,8 @@ Standard expanding window functions Exponentially-weighted moving window functions ---------------------------------------------- +.. currentmodule:: pandas.core.window.ewm + .. autosummary:: :toctree: api/ diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1b39f9225a0ed..4d29f19cc01ed 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10683,9 +10683,9 @@ def _add_series_or_dataframe_operations(cls): the doc strings again. """ - from pandas.core import window as rwindow + from pandas.core.window import EWM, Expanding, Rolling, Window - @Appender(rwindow.rolling.__doc__) + @Appender(Rolling.__doc__) def rolling( self, window, @@ -10697,7 +10697,20 @@ def rolling( closed=None, ): axis = self._get_axis_number(axis) - return rwindow.rolling( + + if win_type is not None: + return Window( + self, + window=window, + min_periods=min_periods, + center=center, + win_type=win_type, + on=on, + axis=axis, + closed=closed, + ) + + return Rolling( self, window=window, min_periods=min_periods, @@ -10710,16 +10723,14 @@ def rolling( cls.rolling = rolling - @Appender(rwindow.expanding.__doc__) + @Appender(Expanding.__doc__) def expanding(self, min_periods=1, center=False, axis=0): axis = self._get_axis_number(axis) - return rwindow.expanding( - self, min_periods=min_periods, center=center, axis=axis - ) + return Expanding(self, min_periods=min_periods, center=center, axis=axis) cls.expanding = expanding - @Appender(rwindow.ewm.__doc__) + @Appender(EWM.__doc__) def ewm( self, com=None, @@ -10732,7 +10743,7 @@ def ewm( axis=0, ): axis = self._get_axis_number(axis) - return rwindow.ewm( + return EWM( self, com=com, span=span, diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py new file mode 100644 index 0000000000000..dcf58a4c0dd5b --- /dev/null +++ b/pandas/core/window/__init__.py @@ -0,0 +1,3 @@ +from pandas.core.window.ewm import EWM # noqa:F401 +from pandas.core.window.expanding import Expanding, ExpandingGroupby # noqa:F401 +from pandas.core.window.rolling import Rolling, RollingGroupby, Window # noqa:F401 diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py new file mode 100644 index 0000000000000..0f2920b3558c9 --- /dev/null +++ b/pandas/core/window/common.py @@ -0,0 +1,276 @@ +"""Common utility functions for rolling operations""" +from collections import defaultdict +import warnings + +import numpy as np + +from pandas.core.dtypes.common import is_integer +from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries + +import pandas.core.common as com +from pandas.core.generic import _shared_docs +from pandas.core.groupby.base import GroupByMixin +from pandas.core.index import MultiIndex + +_shared_docs = dict(**_shared_docs) +_doc_template = """ + Returns + ------- + Series or DataFrame + Return type is determined by the caller. + + See Also + -------- + Series.%(name)s : Series %(name)s. + DataFrame.%(name)s : DataFrame %(name)s. +""" + + +class _GroupByMixin(GroupByMixin): + """ + Provide the groupby facilities. + """ + + def __init__(self, obj, *args, **kwargs): + parent = kwargs.pop("parent", None) # noqa + groupby = kwargs.pop("groupby", None) + if groupby is None: + groupby, obj = obj, obj.obj + self._groupby = groupby + self._groupby.mutated = True + self._groupby.grouper.mutated = True + super().__init__(obj, *args, **kwargs) + + count = GroupByMixin._dispatch("count") + corr = GroupByMixin._dispatch("corr", other=None, pairwise=None) + cov = GroupByMixin._dispatch("cov", other=None, pairwise=None) + + def _apply( + self, func, name=None, window=None, center=None, check_minp=None, **kwargs + ): + """ + Dispatch to apply; we are stripping all of the _apply kwargs and + performing the original function call on the grouped object. + """ + + def f(x, name=name, *args): + x = self._shallow_copy(x) + + if isinstance(name, str): + return getattr(x, name)(*args, **kwargs) + + return x.apply(name, *args, **kwargs) + + return self._groupby.apply(f) + + +def _flex_binary_moment(arg1, arg2, f, pairwise=False): + + if not ( + isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) + and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame)) + ): + raise TypeError( + "arguments to moment function must be of type " + "np.ndarray/Series/DataFrame" + ) + + if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance( + arg2, (np.ndarray, ABCSeries) + ): + X, Y = _prep_binary(arg1, arg2) + return f(X, Y) + + elif isinstance(arg1, ABCDataFrame): + from pandas import DataFrame + + def dataframe_from_int_dict(data, frame_template): + result = DataFrame(data, index=frame_template.index) + if len(result.columns) > 0: + result.columns = frame_template.columns[result.columns] + return result + + results = {} + if isinstance(arg2, ABCDataFrame): + if pairwise is False: + if arg1 is arg2: + # special case in order to handle duplicate column names + for i, col in enumerate(arg1.columns): + results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) + return dataframe_from_int_dict(results, arg1) + else: + if not arg1.columns.is_unique: + raise ValueError("'arg1' columns are not unique") + if not arg2.columns.is_unique: + raise ValueError("'arg2' columns are not unique") + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + X, Y = arg1.align(arg2, join="outer") + X = X + 0 * Y + Y = Y + 0 * X + + with warnings.catch_warnings(record=True): + warnings.simplefilter("ignore", RuntimeWarning) + res_columns = arg1.columns.union(arg2.columns) + for col in res_columns: + if col in X and col in Y: + results[col] = f(X[col], Y[col]) + return DataFrame(results, index=X.index, columns=res_columns) + elif pairwise is True: + results = defaultdict(dict) + for i, k1 in enumerate(arg1.columns): + for j, k2 in enumerate(arg2.columns): + if j < i and arg2 is arg1: + # Symmetric case + results[i][j] = results[j][i] + else: + results[i][j] = f( + *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) + ) + + from pandas import concat + + result_index = arg1.index.union(arg2.index) + if len(result_index): + + # construct result frame + result = concat( + [ + concat( + [results[i][j] for j, c in enumerate(arg2.columns)], + ignore_index=True, + ) + for i, c in enumerate(arg1.columns) + ], + ignore_index=True, + axis=1, + ) + result.columns = arg1.columns + + # set the index and reorder + if arg2.columns.nlevels > 1: + result.index = MultiIndex.from_product( + arg2.columns.levels + [result_index] + ) + result = result.reorder_levels([2, 0, 1]).sort_index() + else: + result.index = MultiIndex.from_product( + [range(len(arg2.columns)), range(len(result_index))] + ) + result = result.swaplevel(1, 0).sort_index() + result.index = MultiIndex.from_product( + [result_index] + [arg2.columns] + ) + else: + + # empty result + result = DataFrame( + index=MultiIndex( + levels=[arg1.index, arg2.columns], codes=[[], []] + ), + columns=arg2.columns, + dtype="float64", + ) + + # reset our index names to arg1 names + # reset our column names to arg2 names + # careful not to mutate the original names + result.columns = result.columns.set_names(arg1.columns.names) + result.index = result.index.set_names( + result_index.names + arg2.columns.names + ) + + return result + + else: + raise ValueError("'pairwise' is not True/False") + else: + results = { + i: f(*_prep_binary(arg1.iloc[:, i], arg2)) + for i, col in enumerate(arg1.columns) + } + return dataframe_from_int_dict(results, arg1) + + else: + return _flex_binary_moment(arg2, arg1, f) + + +def _get_center_of_mass(comass, span, halflife, alpha): + valid_count = com.count_not_none(comass, span, halflife, alpha) + if valid_count > 1: + raise ValueError("comass, span, halflife, and alpha are mutually exclusive") + + # Convert to center of mass; domain checks ensure 0 < alpha <= 1 + if comass is not None: + if comass < 0: + raise ValueError("comass must satisfy: comass >= 0") + elif span is not None: + if span < 1: + raise ValueError("span must satisfy: span >= 1") + comass = (span - 1) / 2.0 + elif halflife is not None: + if halflife <= 0: + raise ValueError("halflife must satisfy: halflife > 0") + decay = 1 - np.exp(np.log(0.5) / halflife) + comass = 1 / decay - 1 + elif alpha is not None: + if alpha <= 0 or alpha > 1: + raise ValueError("alpha must satisfy: 0 < alpha <= 1") + comass = (1.0 - alpha) / alpha + else: + raise ValueError("Must pass one of comass, span, halflife, or alpha") + + return float(comass) + + +def _offset(window, center): + if not is_integer(window): + window = len(window) + offset = (window - 1) / 2.0 if center else 0 + try: + return int(offset) + except TypeError: + return offset.astype(int) + + +def _require_min_periods(p): + def _check_func(minp, window): + if minp is None: + return window + else: + return max(p, minp) + + return _check_func + + +def _use_window(minp, window): + if minp is None: + return window + else: + return minp + + +def _zsqrt(x): + with np.errstate(all="ignore"): + result = np.sqrt(x) + mask = x < 0 + + if isinstance(x, ABCDataFrame): + if mask.values.any(): + result[mask] = 0 + else: + if mask.any(): + result[mask] = 0 + + return result + + +def _prep_binary(arg1, arg2): + if not isinstance(arg2, type(arg1)): + raise Exception("Input arrays must be of the same type!") + + # mask out values, this also makes a common index... + X = arg1 + 0 * arg2 + Y = arg2 + 0 * arg1 + + return X, Y diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py new file mode 100644 index 0000000000000..0ce6d5ddec2ad --- /dev/null +++ b/pandas/core/window/ewm.py @@ -0,0 +1,388 @@ +from textwrap import dedent + +import numpy as np + +import pandas._libs.window as libwindow +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution + +from pandas.core.dtypes.generic import ABCDataFrame + +from pandas.core.base import DataError +from pandas.core.window.common import _doc_template, _get_center_of_mass, _shared_docs +from pandas.core.window.rolling import _flex_binary_moment, _Rolling, _zsqrt + +_bias_template = """ + Parameters + ---------- + bias : bool, default False + Use a standard estimation bias correction. + *args, **kwargs + Arguments and keyword arguments to be passed into func. +""" + +_pairwise_template = """ + Parameters + ---------- + other : Series, DataFrame, or ndarray, optional + If not supplied then will default to self and produce pairwise + output. + pairwise : bool, default None + If False then only matching columns between self and other will be + used and the output will be a DataFrame. + If True then all pairwise combinations will be calculated and the + output will be a MultiIndex DataFrame in the case of DataFrame + inputs. In the case of missing elements, only complete pairwise + observations will be used. + bias : bool, default False + Use a standard estimation bias correction. + **kwargs + Keyword arguments to be passed into func. +""" + + +class EWM(_Rolling): + r""" + Provide exponential weighted functions. + + Parameters + ---------- + com : float, optional + Specify decay in terms of center of mass, + :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. + span : float, optional + Specify decay in terms of span, + :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. + halflife : float, optional + Specify decay in terms of half-life, + :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{for} halflife > 0`. + alpha : float, optional + Specify smoothing factor :math:`\alpha` directly, + :math:`0 < \alpha \leq 1`. + min_periods : int, default 0 + Minimum number of observations in window required to have a value + (otherwise result is NA). + adjust : bool, default True + Divide by decaying adjustment factor in beginning periods to account + for imbalance in relative weightings + (viewing EWMA as a moving average). + ignore_na : bool, default False + Ignore missing values when calculating weights; + specify True to reproduce pre-0.15.0 behavior. + axis : {0 or 'index', 1 or 'columns'}, default 0 + The axis to use. The value 0 identifies the rows, and 1 + identifies the columns. + + Returns + ------- + DataFrame + A Window sub-classed for the particular operation. + + See Also + -------- + rolling : Provides rolling window calculations. + expanding : Provides expanding transformations. + + Notes + ----- + Exactly one of center of mass, span, half-life, and alpha must be provided. + Allowed values and relationship between the parameters are specified in the + parameter descriptions above; see the link at the end of this section for + a detailed explanation. + + When adjust is True (default), weighted averages are calculated using + weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. + + When adjust is False, weighted averages are calculated recursively as: + weighted_average[0] = arg[0]; + weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. + + When ignore_na is False (default), weights are based on absolute positions. + For example, the weights of x and y used in calculating the final weighted + average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and + (1-alpha)**2 and alpha (if adjust is False). + + When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based + on relative positions. For example, the weights of x and y used in + calculating the final weighted average of [x, None, y] are 1-alpha and 1 + (if adjust is True), and 1-alpha and alpha (if adjust is False). + + More details can be found at + http://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows + + Examples + -------- + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + >>> df + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.ewm(com=0.5).mean() + B + 0 0.000000 + 1 0.750000 + 2 1.615385 + 3 1.615385 + 4 3.670213 + """ + _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] + + def __init__( + self, + obj, + com=None, + span=None, + halflife=None, + alpha=None, + min_periods=0, + adjust=True, + ignore_na=False, + axis=0, + ): + self.obj = obj + self.com = _get_center_of_mass(com, span, halflife, alpha) + self.min_periods = min_periods + self.adjust = adjust + self.ignore_na = ignore_na + self.axis = axis + self.on = None + + @property + def _constructor(self): + return EWM + + _agg_see_also_doc = dedent( + """ + See Also + -------- + pandas.DataFrame.rolling.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/Dataframe", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, arg, *args, **kwargs): + return super().aggregate(arg, *args, **kwargs) + + agg = aggregate + + def _apply(self, func, **kwargs): + """ + Rolling statistical measure using supplied function. Designed to be + used with passed-in Cython array-based functions. + + Parameters + ---------- + func : str/callable to apply + + Returns + ------- + y : same type as input argument + """ + blocks, obj = self._create_blocks() + block_list = list(blocks) + + results = [] + exclude = [] + for i, b in enumerate(blocks): + try: + values = self._prep_values(b.values) + + except (TypeError, NotImplementedError): + if isinstance(obj, ABCDataFrame): + exclude.extend(b.columns) + del block_list[i] + continue + else: + raise DataError("No numeric types to aggregate") + + if values.size == 0: + results.append(values.copy()) + continue + + # if we have a string function name, wrap it + if isinstance(func, str): + cfunc = getattr(libwindow, func, None) + if cfunc is None: + raise ValueError( + "we do not support this function " + "in libwindow.{func}".format(func=func) + ) + + def func(arg): + return cfunc( + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + ) + + results.append(np.apply_along_axis(func, self.axis, values)) + + return self._wrap_results(results, block_list, obj, exclude) + + @Substitution(name="ewm") + @Appender(_doc_template) + def mean(self, *args, **kwargs): + """ + Exponential weighted moving average. + + Parameters + ---------- + *args, **kwargs + Arguments and keyword arguments to be passed into func. + """ + nv.validate_window_func("mean", args, kwargs) + return self._apply("ewma", **kwargs) + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_bias_template) + def std(self, bias=False, *args, **kwargs): + """ + Exponential weighted moving stddev. + """ + nv.validate_window_func("std", args, kwargs) + return _zsqrt(self.var(bias=bias, **kwargs)) + + vol = std + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_bias_template) + def var(self, bias=False, *args, **kwargs): + """ + Exponential weighted moving variance. + """ + nv.validate_window_func("var", args, kwargs) + + def f(arg): + return libwindow.ewmcov( + arg, + arg, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias), + ) + + return self._apply(f, **kwargs) + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_pairwise_template) + def cov(self, other=None, pairwise=None, bias=False, **kwargs): + """ + Exponential weighted sample covariance. + """ + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + + def _get_cov(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + cov = libwindow.ewmcov( + X._prep_values(), + Y._prep_values(), + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + int(bias), + ) + return X._wrap_result(cov) + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) + ) + + @Substitution(name="ewm") + @Appender(_doc_template) + @Appender(_pairwise_template) + def corr(self, other=None, pairwise=None, **kwargs): + """ + Exponential weighted sample correlation. + """ + if other is None: + other = self._selected_obj + # only default unset + pairwise = True if pairwise is None else pairwise + other = self._shallow_copy(other) + + def _get_corr(X, Y): + X = self._shallow_copy(X) + Y = self._shallow_copy(Y) + + def _cov(x, y): + return libwindow.ewmcov( + x, + y, + self.com, + int(self.adjust), + int(self.ignore_na), + int(self.min_periods), + 1, + ) + + x_values = X._prep_values() + y_values = Y._prep_values() + with np.errstate(all="ignore"): + cov = _cov(x_values, y_values) + x_var = _cov(x_values, x_values) + y_var = _cov(y_values, y_values) + corr = cov / _zsqrt(x_var * y_var) + return X._wrap_result(corr) + + return _flex_binary_moment( + self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) + ) diff --git a/pandas/core/window/expanding.py b/pandas/core/window/expanding.py new file mode 100644 index 0000000000000..c43ca6b0565f3 --- /dev/null +++ b/pandas/core/window/expanding.py @@ -0,0 +1,260 @@ +from textwrap import dedent + +from pandas.compat.numpy import function as nv +from pandas.util._decorators import Appender, Substitution + +from pandas.core.window.common import _doc_template, _GroupByMixin, _shared_docs +from pandas.core.window.rolling import _Rolling_and_Expanding + + +class Expanding(_Rolling_and_Expanding): + """ + Provide expanding transformations. + + Parameters + ---------- + min_periods : int, default 1 + Minimum number of observations in window required to have a value + (otherwise result is NA). + center : bool, default False + Set the labels at the center of the window. + axis : int or str, default 0 + + Returns + ------- + a Window sub-classed for the particular operation + + See Also + -------- + rolling : Provides rolling window calculations. + ewm : Provides exponential weighted functions. + + Notes + ----- + By default, the result is set to the right edge of the window. This can be + changed to the center of the window by setting ``center=True``. + + Examples + -------- + + >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) + B + 0 0.0 + 1 1.0 + 2 2.0 + 3 NaN + 4 4.0 + + >>> df.expanding(2).sum() + B + 0 NaN + 1 1.0 + 2 3.0 + 3 3.0 + 4 7.0 + """ + + _attributes = ["min_periods", "center", "axis"] + + def __init__(self, obj, min_periods=1, center=False, axis=0, **kwargs): + super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis) + + @property + def _constructor(self): + return Expanding + + def _get_window(self, other=None, **kwargs): + """ + Get the window length over which to perform some operation. + + Parameters + ---------- + other : object, default None + The other object that is involved in the operation. + Such an object is involved for operations like covariance. + + Returns + ------- + window : int + The window length. + """ + axis = self.obj._get_axis(self.axis) + length = len(axis) + (other is not None) * len(axis) + + other = self.min_periods or -1 + return max(length, other) + + _agg_see_also_doc = dedent( + """ + See Also + -------- + DataFrame.expanding.aggregate + DataFrame.rolling.aggregate + DataFrame.aggregate + """ + ) + + _agg_examples_doc = dedent( + """ + Examples + -------- + + >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) + >>> df + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.004295 0.905829 -0.954544 + 2 0.735167 -0.165272 -1.619346 + 3 -0.702657 -1.340923 -0.706334 + 4 -0.246845 0.211596 -0.901819 + 5 2.463718 3.157577 -1.380906 + 6 -1.142255 2.340594 -0.039875 + 7 1.396598 -1.647453 1.677227 + 8 -0.543425 1.761277 -0.220481 + 9 -0.640505 0.289374 -1.550670 + + >>> df.ewm(alpha=0.5).mean() + A B C + 0 -2.385977 -0.102758 0.438822 + 1 -1.464856 0.569633 -0.490089 + 2 -0.207700 0.149687 -1.135379 + 3 -0.471677 -0.645305 -0.906555 + 4 -0.355635 -0.203033 -0.904111 + 5 1.076417 1.503943 -1.146293 + 6 -0.041654 1.925562 -0.588728 + 7 0.680292 0.132049 0.548693 + 8 0.067236 0.948257 0.163353 + 9 -0.286980 0.618493 -0.694496 + """ + ) + + @Substitution( + see_also=_agg_see_also_doc, + examples=_agg_examples_doc, + versionadded="", + klass="Series/Dataframe", + axis="", + ) + @Appender(_shared_docs["aggregate"]) + def aggregate(self, arg, *args, **kwargs): + return super().aggregate(arg, *args, **kwargs) + + agg = aggregate + + @Substitution(name="expanding") + @Appender(_shared_docs["count"]) + def count(self, **kwargs): + return super().count(**kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["apply"]) + def apply(self, func, raw=None, args=(), kwargs={}): + return super().apply(func, raw=raw, args=args, kwargs=kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["sum"]) + def sum(self, *args, **kwargs): + nv.validate_expanding_func("sum", args, kwargs) + return super().sum(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["max"]) + def max(self, *args, **kwargs): + nv.validate_expanding_func("max", args, kwargs) + return super().max(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["min"]) + def min(self, *args, **kwargs): + nv.validate_expanding_func("min", args, kwargs) + return super().min(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["mean"]) + def mean(self, *args, **kwargs): + nv.validate_expanding_func("mean", args, kwargs) + return super().mean(*args, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["median"]) + def median(self, **kwargs): + return super().median(**kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["std"]) + def std(self, ddof=1, *args, **kwargs): + nv.validate_expanding_func("std", args, kwargs) + return super().std(ddof=ddof, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["var"]) + def var(self, ddof=1, *args, **kwargs): + nv.validate_expanding_func("var", args, kwargs) + return super().var(ddof=ddof, **kwargs) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["skew"]) + def skew(self, **kwargs): + return super().skew(**kwargs) + + _agg_doc = dedent( + """ + Examples + -------- + + The example below will show an expanding calculation with a window size of + four matching the equivalent function call using `scipy.stats`. + + >>> arr = [1, 2, 3, 4, 999] + >>> import scipy.stats + >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits + >>> print(fmt.format(scipy.stats.kurtosis(arr[:-1], bias=False))) + -1.200000 + >>> print(fmt.format(scipy.stats.kurtosis(arr, bias=False))) + 4.999874 + >>> s = pd.Series(arr) + >>> s.expanding(4).kurt() + 0 NaN + 1 NaN + 2 NaN + 3 -1.200000 + 4 4.999874 + dtype: float64 + """ + ) + + @Appender(_agg_doc) + @Substitution(name="expanding") + @Appender(_shared_docs["kurt"]) + def kurt(self, **kwargs): + return super().kurt(**kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["quantile"]) + def quantile(self, quantile, interpolation="linear", **kwargs): + return super().quantile( + quantile=quantile, interpolation=interpolation, **kwargs + ) + + @Substitution(name="expanding") + @Appender(_doc_template) + @Appender(_shared_docs["cov"]) + def cov(self, other=None, pairwise=None, ddof=1, **kwargs): + return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) + + @Substitution(name="expanding") + @Appender(_shared_docs["corr"]) + def corr(self, other=None, pairwise=None, **kwargs): + return super().corr(other=other, pairwise=pairwise, **kwargs) + + +class ExpandingGroupby(_GroupByMixin, Expanding): + """ + Provide a expanding groupby implementation. + """ + + @property + def _constructor(self): + return Expanding diff --git a/pandas/core/window.py b/pandas/core/window/rolling.py similarity index 66% rename from pandas/core/window.py rename to pandas/core/window/rolling.py index 3e3f17369db7b..323089b3fdf6b 100644 --- a/pandas/core/window.py +++ b/pandas/core/window/rolling.py @@ -2,7 +2,6 @@ Provide a generic structure to support window functions, similar to how we have a Groupby object. """ -from collections import defaultdict from datetime import timedelta from textwrap import dedent from typing import Callable, List, Optional, Set, Union @@ -38,22 +37,17 @@ from pandas._typing import Axis, FrameOrSeries, Scalar from pandas.core.base import DataError, PandasObject, SelectionMixin import pandas.core.common as com -from pandas.core.generic import _shared_docs -from pandas.core.groupby.base import GroupByMixin -from pandas.core.index import Index, MultiIndex, ensure_index - -_shared_docs = dict(**_shared_docs) -_doc_template = """ - Returns - ------- - Series or DataFrame - Return type is determined by the caller. - - See Also - -------- - Series.%(name)s : Series %(name)s. - DataFrame.%(name)s : DataFrame %(name)s. -""" +from pandas.core.index import Index, ensure_index +from pandas.core.window.common import ( + _doc_template, + _flex_binary_moment, + _GroupByMixin, + _offset, + _require_min_periods, + _shared_docs, + _use_window, + _zsqrt, +) class _Window(PandasObject, SelectionMixin): @@ -121,6 +115,8 @@ def validate(self): "neither", ]: raise ValueError("closed must be 'right', 'left', 'both' or 'neither'") + if not isinstance(self.obj, (ABCSeries, ABCDataFrame)): + raise TypeError("invalid type: {}".format(type(self))) def _create_blocks(self): """ @@ -929,44 +925,6 @@ def mean(self, *args, **kwargs): return self._apply("roll_weighted_mean", **kwargs) -class _GroupByMixin(GroupByMixin): - """ - Provide the groupby facilities. - """ - - def __init__(self, obj, *args, **kwargs): - parent = kwargs.pop("parent", None) # noqa - groupby = kwargs.pop("groupby", None) - if groupby is None: - groupby, obj = obj, obj.obj - self._groupby = groupby - self._groupby.mutated = True - self._groupby.grouper.mutated = True - super().__init__(obj, *args, **kwargs) - - count = GroupByMixin._dispatch("count") - corr = GroupByMixin._dispatch("corr", other=None, pairwise=None) - cov = GroupByMixin._dispatch("cov", other=None, pairwise=None) - - def _apply( - self, func, name=None, window=None, center=None, check_minp=None, **kwargs - ): - """ - Dispatch to apply; we are stripping all of the _apply kwargs and - performing the original function call on the grouped object. - """ - - def f(x, name=name, *args): - x = self._shallow_copy(x) - - if isinstance(name, str): - return getattr(x, name)(*args, **kwargs) - - return x.apply(name, *args, **kwargs) - - return self._groupby.apply(f) - - class _Rolling(_Window): @property def _constructor(self): @@ -1949,6 +1907,9 @@ def corr(self, other=None, pairwise=None, **kwargs): return super().corr(other=other, pairwise=pairwise, **kwargs) +Rolling.__doc__ = Window.__doc__ + + class RollingGroupby(_GroupByMixin, Rolling): """ Provide a rolling groupby implementation. @@ -1976,883 +1937,3 @@ def _validate_monotonic(self): level. """ pass - - -class Expanding(_Rolling_and_Expanding): - """ - Provide expanding transformations. - - Parameters - ---------- - min_periods : int, default 1 - Minimum number of observations in window required to have a value - (otherwise result is NA). - center : bool, default False - Set the labels at the center of the window. - axis : int or str, default 0 - - Returns - ------- - a Window sub-classed for the particular operation - - See Also - -------- - rolling : Provides rolling window calculations. - ewm : Provides exponential weighted functions. - - Notes - ----- - By default, the result is set to the right edge of the window. This can be - changed to the center of the window by setting ``center=True``. - - Examples - -------- - - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) - B - 0 0.0 - 1 1.0 - 2 2.0 - 3 NaN - 4 4.0 - - >>> df.expanding(2).sum() - B - 0 NaN - 1 1.0 - 2 3.0 - 3 3.0 - 4 7.0 - """ - - _attributes = ["min_periods", "center", "axis"] - - def __init__(self, obj, min_periods=1, center=False, axis=0, **kwargs): - super().__init__(obj=obj, min_periods=min_periods, center=center, axis=axis) - - @property - def _constructor(self): - return Expanding - - def _get_window(self, other=None, **kwargs): - """ - Get the window length over which to perform some operation. - - Parameters - ---------- - other : object, default None - The other object that is involved in the operation. - Such an object is involved for operations like covariance. - - Returns - ------- - window : int - The window length. - """ - axis = self.obj._get_axis(self.axis) - length = len(axis) + (other is not None) * len(axis) - - other = self.min_periods or -1 - return max(length, other) - - _agg_see_also_doc = dedent( - """ - See Also - -------- - DataFrame.expanding.aggregate - DataFrame.rolling.aggregate - DataFrame.aggregate - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) - >>> df - A B C - 0 -2.385977 -0.102758 0.438822 - 1 -1.004295 0.905829 -0.954544 - 2 0.735167 -0.165272 -1.619346 - 3 -0.702657 -1.340923 -0.706334 - 4 -0.246845 0.211596 -0.901819 - 5 2.463718 3.157577 -1.380906 - 6 -1.142255 2.340594 -0.039875 - 7 1.396598 -1.647453 1.677227 - 8 -0.543425 1.761277 -0.220481 - 9 -0.640505 0.289374 -1.550670 - - >>> df.ewm(alpha=0.5).mean() - A B C - 0 -2.385977 -0.102758 0.438822 - 1 -1.464856 0.569633 -0.490089 - 2 -0.207700 0.149687 -1.135379 - 3 -0.471677 -0.645305 -0.906555 - 4 -0.355635 -0.203033 -0.904111 - 5 1.076417 1.503943 -1.146293 - 6 -0.041654 1.925562 -0.588728 - 7 0.680292 0.132049 0.548693 - 8 0.067236 0.948257 0.163353 - 9 -0.286980 0.618493 -0.694496 - """ - ) - - @Substitution( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - versionadded="", - klass="Series/Dataframe", - axis="", - ) - @Appender(_shared_docs["aggregate"]) - def aggregate(self, arg, *args, **kwargs): - return super().aggregate(arg, *args, **kwargs) - - agg = aggregate - - @Substitution(name="expanding") - @Appender(_shared_docs["count"]) - def count(self, **kwargs): - return super().count(**kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["apply"]) - def apply(self, func, raw=None, args=(), kwargs={}): - return super().apply(func, raw=raw, args=args, kwargs=kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["sum"]) - def sum(self, *args, **kwargs): - nv.validate_expanding_func("sum", args, kwargs) - return super().sum(*args, **kwargs) - - @Substitution(name="expanding") - @Appender(_doc_template) - @Appender(_shared_docs["max"]) - def max(self, *args, **kwargs): - nv.validate_expanding_func("max", args, kwargs) - return super().max(*args, **kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["min"]) - def min(self, *args, **kwargs): - nv.validate_expanding_func("min", args, kwargs) - return super().min(*args, **kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["mean"]) - def mean(self, *args, **kwargs): - nv.validate_expanding_func("mean", args, kwargs) - return super().mean(*args, **kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["median"]) - def median(self, **kwargs): - return super().median(**kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["std"]) - def std(self, ddof=1, *args, **kwargs): - nv.validate_expanding_func("std", args, kwargs) - return super().std(ddof=ddof, **kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["var"]) - def var(self, ddof=1, *args, **kwargs): - nv.validate_expanding_func("var", args, kwargs) - return super().var(ddof=ddof, **kwargs) - - @Substitution(name="expanding") - @Appender(_doc_template) - @Appender(_shared_docs["skew"]) - def skew(self, **kwargs): - return super().skew(**kwargs) - - _agg_doc = dedent( - """ - Examples - -------- - - The example below will show an expanding calculation with a window size of - four matching the equivalent function call using `scipy.stats`. - - >>> arr = [1, 2, 3, 4, 999] - >>> import scipy.stats - >>> fmt = "{0:.6f}" # limit the printed precision to 6 digits - >>> print(fmt.format(scipy.stats.kurtosis(arr[:-1], bias=False))) - -1.200000 - >>> print(fmt.format(scipy.stats.kurtosis(arr, bias=False))) - 4.999874 - >>> s = pd.Series(arr) - >>> s.expanding(4).kurt() - 0 NaN - 1 NaN - 2 NaN - 3 -1.200000 - 4 4.999874 - dtype: float64 - """ - ) - - @Appender(_agg_doc) - @Substitution(name="expanding") - @Appender(_shared_docs["kurt"]) - def kurt(self, **kwargs): - return super().kurt(**kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["quantile"]) - def quantile(self, quantile, interpolation="linear", **kwargs): - return super().quantile( - quantile=quantile, interpolation=interpolation, **kwargs - ) - - @Substitution(name="expanding") - @Appender(_doc_template) - @Appender(_shared_docs["cov"]) - def cov(self, other=None, pairwise=None, ddof=1, **kwargs): - return super().cov(other=other, pairwise=pairwise, ddof=ddof, **kwargs) - - @Substitution(name="expanding") - @Appender(_shared_docs["corr"]) - def corr(self, other=None, pairwise=None, **kwargs): - return super().corr(other=other, pairwise=pairwise, **kwargs) - - -class ExpandingGroupby(_GroupByMixin, Expanding): - """ - Provide a expanding groupby implementation. - """ - - @property - def _constructor(self): - return Expanding - - -_bias_template = """ - Parameters - ---------- - bias : bool, default False - Use a standard estimation bias correction. - *args, **kwargs - Arguments and keyword arguments to be passed into func. -""" - -_pairwise_template = """ - Parameters - ---------- - other : Series, DataFrame, or ndarray, optional - If not supplied then will default to self and produce pairwise - output. - pairwise : bool, default None - If False then only matching columns between self and other will be - used and the output will be a DataFrame. - If True then all pairwise combinations will be calculated and the - output will be a MultiIndex DataFrame in the case of DataFrame - inputs. In the case of missing elements, only complete pairwise - observations will be used. - bias : bool, default False - Use a standard estimation bias correction. - **kwargs - Keyword arguments to be passed into func. -""" - - -class EWM(_Rolling): - r""" - Provide exponential weighted functions. - - Parameters - ---------- - com : float, optional - Specify decay in terms of center of mass, - :math:`\alpha = 1 / (1 + com),\text{ for } com \geq 0`. - span : float, optional - Specify decay in terms of span, - :math:`\alpha = 2 / (span + 1),\text{ for } span \geq 1`. - halflife : float, optional - Specify decay in terms of half-life, - :math:`\alpha = 1 - exp(log(0.5) / halflife),\text{for} halflife > 0`. - alpha : float, optional - Specify smoothing factor :math:`\alpha` directly, - :math:`0 < \alpha \leq 1`. - min_periods : int, default 0 - Minimum number of observations in window required to have a value - (otherwise result is NA). - adjust : bool, default True - Divide by decaying adjustment factor in beginning periods to account - for imbalance in relative weightings - (viewing EWMA as a moving average). - ignore_na : bool, default False - Ignore missing values when calculating weights; - specify True to reproduce pre-0.15.0 behavior. - axis : {0 or 'index', 1 or 'columns'}, default 0 - The axis to use. The value 0 identifies the rows, and 1 - identifies the columns. - - Returns - ------- - DataFrame - A Window sub-classed for the particular operation. - - See Also - -------- - rolling : Provides rolling window calculations. - expanding : Provides expanding transformations. - - Notes - ----- - Exactly one of center of mass, span, half-life, and alpha must be provided. - Allowed values and relationship between the parameters are specified in the - parameter descriptions above; see the link at the end of this section for - a detailed explanation. - - When adjust is True (default), weighted averages are calculated using - weights (1-alpha)**(n-1), (1-alpha)**(n-2), ..., 1-alpha, 1. - - When adjust is False, weighted averages are calculated recursively as: - weighted_average[0] = arg[0]; - weighted_average[i] = (1-alpha)*weighted_average[i-1] + alpha*arg[i]. - - When ignore_na is False (default), weights are based on absolute positions. - For example, the weights of x and y used in calculating the final weighted - average of [x, None, y] are (1-alpha)**2 and 1 (if adjust is True), and - (1-alpha)**2 and alpha (if adjust is False). - - When ignore_na is True (reproducing pre-0.15.0 behavior), weights are based - on relative positions. For example, the weights of x and y used in - calculating the final weighted average of [x, None, y] are 1-alpha and 1 - (if adjust is True), and 1-alpha and alpha (if adjust is False). - - More details can be found at - http://pandas.pydata.org/pandas-docs/stable/user_guide/computation.html#exponentially-weighted-windows - - Examples - -------- - - >>> df = pd.DataFrame({'B': [0, 1, 2, np.nan, 4]}) - >>> df - B - 0 0.0 - 1 1.0 - 2 2.0 - 3 NaN - 4 4.0 - - >>> df.ewm(com=0.5).mean() - B - 0 0.000000 - 1 0.750000 - 2 1.615385 - 3 1.615385 - 4 3.670213 - """ - _attributes = ["com", "min_periods", "adjust", "ignore_na", "axis"] - - def __init__( - self, - obj, - com=None, - span=None, - halflife=None, - alpha=None, - min_periods=0, - adjust=True, - ignore_na=False, - axis=0, - ): - self.obj = obj - self.com = _get_center_of_mass(com, span, halflife, alpha) - self.min_periods = min_periods - self.adjust = adjust - self.ignore_na = ignore_na - self.axis = axis - self.on = None - - @property - def _constructor(self): - return EWM - - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.DataFrame.rolling.aggregate - """ - ) - - _agg_examples_doc = dedent( - """ - Examples - -------- - - >>> df = pd.DataFrame(np.random.randn(10, 3), columns=['A', 'B', 'C']) - >>> df - A B C - 0 -2.385977 -0.102758 0.438822 - 1 -1.004295 0.905829 -0.954544 - 2 0.735167 -0.165272 -1.619346 - 3 -0.702657 -1.340923 -0.706334 - 4 -0.246845 0.211596 -0.901819 - 5 2.463718 3.157577 -1.380906 - 6 -1.142255 2.340594 -0.039875 - 7 1.396598 -1.647453 1.677227 - 8 -0.543425 1.761277 -0.220481 - 9 -0.640505 0.289374 -1.550670 - - >>> df.ewm(alpha=0.5).mean() - A B C - 0 -2.385977 -0.102758 0.438822 - 1 -1.464856 0.569633 -0.490089 - 2 -0.207700 0.149687 -1.135379 - 3 -0.471677 -0.645305 -0.906555 - 4 -0.355635 -0.203033 -0.904111 - 5 1.076417 1.503943 -1.146293 - 6 -0.041654 1.925562 -0.588728 - 7 0.680292 0.132049 0.548693 - 8 0.067236 0.948257 0.163353 - 9 -0.286980 0.618493 -0.694496 - """ - ) - - @Substitution( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - versionadded="", - klass="Series/Dataframe", - axis="", - ) - @Appender(_shared_docs["aggregate"]) - def aggregate(self, arg, *args, **kwargs): - return super().aggregate(arg, *args, **kwargs) - - agg = aggregate - - def _apply(self, func, **kwargs): - """ - Rolling statistical measure using supplied function. Designed to be - used with passed-in Cython array-based functions. - - Parameters - ---------- - func : str/callable to apply - - Returns - ------- - y : same type as input argument - """ - blocks, obj = self._create_blocks() - block_list = list(blocks) - - results = [] - exclude = [] - for i, b in enumerate(blocks): - try: - values = self._prep_values(b.values) - - except (TypeError, NotImplementedError): - if isinstance(obj, ABCDataFrame): - exclude.extend(b.columns) - del block_list[i] - continue - else: - raise DataError("No numeric types to aggregate") - - if values.size == 0: - results.append(values.copy()) - continue - - # if we have a string function name, wrap it - if isinstance(func, str): - cfunc = getattr(libwindow, func, None) - if cfunc is None: - raise ValueError( - "we do not support this function " - "in libwindow.{func}".format(func=func) - ) - - def func(arg): - return cfunc( - arg, - self.com, - int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - ) - - results.append(np.apply_along_axis(func, self.axis, values)) - - return self._wrap_results(results, block_list, obj, exclude) - - @Substitution(name="ewm") - @Appender(_doc_template) - def mean(self, *args, **kwargs): - """ - Exponential weighted moving average. - - Parameters - ---------- - *args, **kwargs - Arguments and keyword arguments to be passed into func. - """ - nv.validate_window_func("mean", args, kwargs) - return self._apply("ewma", **kwargs) - - @Substitution(name="ewm") - @Appender(_doc_template) - @Appender(_bias_template) - def std(self, bias=False, *args, **kwargs): - """ - Exponential weighted moving stddev. - """ - nv.validate_window_func("std", args, kwargs) - return _zsqrt(self.var(bias=bias, **kwargs)) - - vol = std - - @Substitution(name="ewm") - @Appender(_doc_template) - @Appender(_bias_template) - def var(self, bias=False, *args, **kwargs): - """ - Exponential weighted moving variance. - """ - nv.validate_window_func("var", args, kwargs) - - def f(arg): - return libwindow.ewmcov( - arg, - arg, - self.com, - int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - int(bias), - ) - - return self._apply(f, **kwargs) - - @Substitution(name="ewm") - @Appender(_doc_template) - @Appender(_pairwise_template) - def cov(self, other=None, pairwise=None, bias=False, **kwargs): - """ - Exponential weighted sample covariance. - """ - if other is None: - other = self._selected_obj - # only default unset - pairwise = True if pairwise is None else pairwise - other = self._shallow_copy(other) - - def _get_cov(X, Y): - X = self._shallow_copy(X) - Y = self._shallow_copy(Y) - cov = libwindow.ewmcov( - X._prep_values(), - Y._prep_values(), - self.com, - int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - int(bias), - ) - return X._wrap_result(cov) - - return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_cov, pairwise=bool(pairwise) - ) - - @Substitution(name="ewm") - @Appender(_doc_template) - @Appender(_pairwise_template) - def corr(self, other=None, pairwise=None, **kwargs): - """ - Exponential weighted sample correlation. - """ - if other is None: - other = self._selected_obj - # only default unset - pairwise = True if pairwise is None else pairwise - other = self._shallow_copy(other) - - def _get_corr(X, Y): - X = self._shallow_copy(X) - Y = self._shallow_copy(Y) - - def _cov(x, y): - return libwindow.ewmcov( - x, - y, - self.com, - int(self.adjust), - int(self.ignore_na), - int(self.min_periods), - 1, - ) - - x_values = X._prep_values() - y_values = Y._prep_values() - with np.errstate(all="ignore"): - cov = _cov(x_values, y_values) - x_var = _cov(x_values, x_values) - y_var = _cov(y_values, y_values) - corr = cov / _zsqrt(x_var * y_var) - return X._wrap_result(corr) - - return _flex_binary_moment( - self._selected_obj, other._selected_obj, _get_corr, pairwise=bool(pairwise) - ) - - -# Helper Funcs - - -def _flex_binary_moment(arg1, arg2, f, pairwise=False): - - if not ( - isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame)) - and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame)) - ): - raise TypeError( - "arguments to moment function must be of type " - "np.ndarray/Series/DataFrame" - ) - - if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance( - arg2, (np.ndarray, ABCSeries) - ): - X, Y = _prep_binary(arg1, arg2) - return f(X, Y) - - elif isinstance(arg1, ABCDataFrame): - from pandas import DataFrame - - def dataframe_from_int_dict(data, frame_template): - result = DataFrame(data, index=frame_template.index) - if len(result.columns) > 0: - result.columns = frame_template.columns[result.columns] - return result - - results = {} - if isinstance(arg2, ABCDataFrame): - if pairwise is False: - if arg1 is arg2: - # special case in order to handle duplicate column names - for i, col in enumerate(arg1.columns): - results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i]) - return dataframe_from_int_dict(results, arg1) - else: - if not arg1.columns.is_unique: - raise ValueError("'arg1' columns are not unique") - if not arg2.columns.is_unique: - raise ValueError("'arg2' columns are not unique") - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - X, Y = arg1.align(arg2, join="outer") - X = X + 0 * Y - Y = Y + 0 * X - - with warnings.catch_warnings(record=True): - warnings.simplefilter("ignore", RuntimeWarning) - res_columns = arg1.columns.union(arg2.columns) - for col in res_columns: - if col in X and col in Y: - results[col] = f(X[col], Y[col]) - return DataFrame(results, index=X.index, columns=res_columns) - elif pairwise is True: - results = defaultdict(dict) - for i, k1 in enumerate(arg1.columns): - for j, k2 in enumerate(arg2.columns): - if j < i and arg2 is arg1: - # Symmetric case - results[i][j] = results[j][i] - else: - results[i][j] = f( - *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j]) - ) - - from pandas import concat - - result_index = arg1.index.union(arg2.index) - if len(result_index): - - # construct result frame - result = concat( - [ - concat( - [results[i][j] for j, c in enumerate(arg2.columns)], - ignore_index=True, - ) - for i, c in enumerate(arg1.columns) - ], - ignore_index=True, - axis=1, - ) - result.columns = arg1.columns - - # set the index and reorder - if arg2.columns.nlevels > 1: - result.index = MultiIndex.from_product( - arg2.columns.levels + [result_index] - ) - result = result.reorder_levels([2, 0, 1]).sort_index() - else: - result.index = MultiIndex.from_product( - [range(len(arg2.columns)), range(len(result_index))] - ) - result = result.swaplevel(1, 0).sort_index() - result.index = MultiIndex.from_product( - [result_index] + [arg2.columns] - ) - else: - - # empty result - result = DataFrame( - index=MultiIndex( - levels=[arg1.index, arg2.columns], codes=[[], []] - ), - columns=arg2.columns, - dtype="float64", - ) - - # reset our index names to arg1 names - # reset our column names to arg2 names - # careful not to mutate the original names - result.columns = result.columns.set_names(arg1.columns.names) - result.index = result.index.set_names( - result_index.names + arg2.columns.names - ) - - return result - - else: - raise ValueError("'pairwise' is not True/False") - else: - results = { - i: f(*_prep_binary(arg1.iloc[:, i], arg2)) - for i, col in enumerate(arg1.columns) - } - return dataframe_from_int_dict(results, arg1) - - else: - return _flex_binary_moment(arg2, arg1, f) - - -def _get_center_of_mass(comass, span, halflife, alpha): - valid_count = com.count_not_none(comass, span, halflife, alpha) - if valid_count > 1: - raise ValueError("comass, span, halflife, and alpha are mutually exclusive") - - # Convert to center of mass; domain checks ensure 0 < alpha <= 1 - if comass is not None: - if comass < 0: - raise ValueError("comass must satisfy: comass >= 0") - elif span is not None: - if span < 1: - raise ValueError("span must satisfy: span >= 1") - comass = (span - 1) / 2.0 - elif halflife is not None: - if halflife <= 0: - raise ValueError("halflife must satisfy: halflife > 0") - decay = 1 - np.exp(np.log(0.5) / halflife) - comass = 1 / decay - 1 - elif alpha is not None: - if alpha <= 0 or alpha > 1: - raise ValueError("alpha must satisfy: 0 < alpha <= 1") - comass = (1.0 - alpha) / alpha - else: - raise ValueError("Must pass one of comass, span, halflife, or alpha") - - return float(comass) - - -def _offset(window, center): - if not is_integer(window): - window = len(window) - offset = (window - 1) / 2.0 if center else 0 - try: - return int(offset) - except TypeError: - return offset.astype(int) - - -def _require_min_periods(p): - def _check_func(minp, window): - if minp is None: - return window - else: - return max(p, minp) - - return _check_func - - -def _use_window(minp, window): - if minp is None: - return window - else: - return minp - - -def _zsqrt(x): - with np.errstate(all="ignore"): - result = np.sqrt(x) - mask = x < 0 - - if isinstance(x, ABCDataFrame): - if mask.values.any(): - result[mask] = 0 - else: - if mask.any(): - result[mask] = 0 - - return result - - -def _prep_binary(arg1, arg2): - if not isinstance(arg2, type(arg1)): - raise Exception("Input arrays must be of the same type!") - - # mask out values, this also makes a common index... - X = arg1 + 0 * arg2 - Y = arg2 + 0 * arg1 - - return X, Y - - -# Top-level exports - - -def rolling(obj, win_type=None, **kwds): - if not isinstance(obj, (ABCSeries, ABCDataFrame)): - raise TypeError("invalid type: %s" % type(obj)) - - if win_type is not None: - return Window(obj, win_type=win_type, **kwds) - - return Rolling(obj, **kwds) - - -rolling.__doc__ = Window.__doc__ - - -def expanding(obj, **kwds): - if not isinstance(obj, (ABCSeries, ABCDataFrame)): - raise TypeError("invalid type: %s" % type(obj)) - - return Expanding(obj, **kwds) - - -expanding.__doc__ = Expanding.__doc__ - - -def ewm(obj, **kwds): - if not isinstance(obj, (ABCSeries, ABCDataFrame)): - raise TypeError("invalid type: %s" % type(obj)) - - return EWM(obj, **kwds) - - -ewm.__doc__ = EWM.__doc__ diff --git a/pandas/tests/window/test_ewm.py b/pandas/tests/window/test_ewm.py index a05b567adad7a..1683fda500f85 100644 --- a/pandas/tests/window/test_ewm.py +++ b/pandas/tests/window/test_ewm.py @@ -4,7 +4,7 @@ from pandas.errors import UnsupportedFunctionCall from pandas import DataFrame, Series -import pandas.core.window as rwindow +from pandas.core.window import EWM from pandas.tests.window.common import Base @@ -60,7 +60,7 @@ def test_constructor(self, which): @pytest.mark.parametrize("method", ["std", "mean", "var"]) def test_numpy_compat(self, method): # see gh-12811 - e = rwindow.EWM(Series([2, 4, 6]), alpha=0.5) + e = EWM(Series([2, 4, 6]), alpha=0.5) msg = "numpy operations are not valid with window objects" diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index 1e92c981964c5..098acdff93ac6 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import DataFrame, Series -import pandas.core.window as rwindow +from pandas.core.window import Expanding from pandas.tests.window.common import Base import pandas.util.testing as tm @@ -42,7 +42,7 @@ def test_constructor(self, which): @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 - e = rwindow.Expanding(Series([2, 4, 6]), window=2) + e = Expanding(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" diff --git a/pandas/tests/window/test_moments.py b/pandas/tests/window/test_moments.py index d860859958254..3d6cd7d10bd10 100644 --- a/pandas/tests/window/test_moments.py +++ b/pandas/tests/window/test_moments.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import DataFrame, Index, Series, concat, isna, notna -import pandas.core.window as rwindow +from pandas.core.window.common import _flex_binary_moment from pandas.tests.window.common import Base import pandas.util.testing as tm @@ -1878,7 +1878,7 @@ def test_flex_binary_moment(self): " np.ndarray/Series/DataFrame" ) with pytest.raises(TypeError, match=msg): - rwindow._flex_binary_moment(5, 6, None) + _flex_binary_moment(5, 6, None) def test_corr_sanity(self): # GH 3155 diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index f0787ab3d191f..b4787bf25e3bb 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -8,7 +8,7 @@ import pandas as pd from pandas import DataFrame, Series -import pandas.core.window as rwindow +from pandas.core.window import Rolling from pandas.tests.window.common import Base import pandas.util.testing as tm @@ -101,7 +101,7 @@ def test_constructor_timedelta_window_and_minperiods(self, window, raw): @pytest.mark.parametrize("method", ["std", "mean", "sum", "max", "min", "var"]) def test_numpy_compat(self, method): # see gh-12811 - r = rwindow.Rolling(Series([2, 4, 6]), window=2) + r = Rolling(Series([2, 4, 6]), window=2) msg = "numpy operations are not valid with window objects" diff --git a/pandas/tests/window/test_window.py b/pandas/tests/window/test_window.py index a6a56c98a9377..5692404205012 100644 --- a/pandas/tests/window/test_window.py +++ b/pandas/tests/window/test_window.py @@ -6,7 +6,7 @@ import pandas as pd from pandas import Series -import pandas.core.window as rwindow +from pandas.core.window import Window from pandas.tests.window.common import Base @@ -50,7 +50,7 @@ def test_constructor_with_win_type(self, which, win_types): @pytest.mark.parametrize("method", ["sum", "mean"]) def test_numpy_compat(self, method): # see gh-12811 - w = rwindow.Window(Series([2, 4, 6]), window=[0, 2]) + w = Window(Series([2, 4, 6]), window=[0, 2]) msg = "numpy operations are not valid with window objects"