REF: pandas/core/window.py into multiple files (#27736)

pandas-dev · Aug 7, 2019 · 3bf35c6 · 3bf35c6
1 parent 9724ace
commit 3bf35c6
Show file tree

Hide file tree

Showing 12 changed files with 979 additions and 955 deletions.
diff --git a/doc/source/reference/window.rst b/doc/source/reference/window.rst
@@ -5,14 +5,15 @@
 ======
 Window
 ======
-.. currentmodule:: pandas.core.window
 
 Rolling objects are returned by ``.rolling`` calls: :func:`pandas.DataFrame.rolling`, :func:`pandas.Series.rolling`, etc.
 Expanding objects are returned by ``.expanding`` calls: :func:`pandas.DataFrame.expanding`, :func:`pandas.Series.expanding`, etc.
 EWM objects are returned by ``.ewm`` calls: :func:`pandas.DataFrame.ewm`, :func:`pandas.Series.ewm`, etc.
 
 Standard moving window functions
 --------------------------------
+.. currentmodule:: pandas.core.window.rolling
+
 .. autosummary::
    :toctree: api/
 
@@ -38,6 +39,8 @@ Standard moving window functions
 
 Standard expanding window functions
 -----------------------------------
+.. currentmodule:: pandas.core.window.expanding
+
 .. autosummary::
    :toctree: api/
 
@@ -59,6 +62,8 @@ Standard expanding window functions
 
 Exponentially-weighted moving window functions
 ----------------------------------------------
+.. currentmodule:: pandas.core.window.ewm
+
 .. autosummary::
    :toctree: api/
 

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -10683,9 +10683,9 @@ def _add_series_or_dataframe_operations(cls):
         the doc strings again.
         """
 
-        from pandas.core import window as rwindow
+        from pandas.core.window import EWM, Expanding, Rolling, Window
 
-        @Appender(rwindow.rolling.__doc__)
+        @Appender(Rolling.__doc__)
         def rolling(
             self,
             window,
@@ -10697,7 +10697,20 @@ def rolling(
             closed=None,
         ):
             axis = self._get_axis_number(axis)
-            return rwindow.rolling(
+
+            if win_type is not None:
+                return Window(
+                    self,
+                    window=window,
+                    min_periods=min_periods,
+                    center=center,
+                    win_type=win_type,
+                    on=on,
+                    axis=axis,
+                    closed=closed,
+                )
+
+            return Rolling(
                 self,
                 window=window,
                 min_periods=min_periods,
@@ -10710,16 +10723,14 @@ def rolling(
 
         cls.rolling = rolling
 
-        @Appender(rwindow.expanding.__doc__)
+        @Appender(Expanding.__doc__)
         def expanding(self, min_periods=1, center=False, axis=0):
             axis = self._get_axis_number(axis)
-            return rwindow.expanding(
-                self, min_periods=min_periods, center=center, axis=axis
-            )
+            return Expanding(self, min_periods=min_periods, center=center, axis=axis)
 
         cls.expanding = expanding
 
-        @Appender(rwindow.ewm.__doc__)
+        @Appender(EWM.__doc__)
         def ewm(
             self,
             com=None,
@@ -10732,7 +10743,7 @@ def ewm(
             axis=0,
         ):
             axis = self._get_axis_number(axis)
-            return rwindow.ewm(
+            return EWM(
                 self,
                 com=com,
                 span=span,

diff --git a/pandas/core/window/__init__.py b/pandas/core/window/__init__.py
@@ -0,0 +1,3 @@
+from pandas.core.window.ewm import EWM  # noqa:F401
+from pandas.core.window.expanding import Expanding, ExpandingGroupby  # noqa:F401
+from pandas.core.window.rolling import Rolling, RollingGroupby, Window  # noqa:F401
diff --git a/pandas/core/window/common.py b/pandas/core/window/common.py
@@ -0,0 +1,276 @@
+"""Common utility functions for rolling operations"""
+from collections import defaultdict
+import warnings
+
+import numpy as np
+
+from pandas.core.dtypes.common import is_integer
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+
+import pandas.core.common as com
+from pandas.core.generic import _shared_docs
+from pandas.core.groupby.base import GroupByMixin
+from pandas.core.index import MultiIndex
+
+_shared_docs = dict(**_shared_docs)
+_doc_template = """
+        Returns
+        -------
+        Series or DataFrame
+            Return type is determined by the caller.
+
+        See Also
+        --------
+        Series.%(name)s : Series %(name)s.
+        DataFrame.%(name)s : DataFrame %(name)s.
+"""
+
+
+class _GroupByMixin(GroupByMixin):
+    """
+    Provide the groupby facilities.
+    """
+
+    def __init__(self, obj, *args, **kwargs):
+        parent = kwargs.pop("parent", None)  # noqa
+        groupby = kwargs.pop("groupby", None)
+        if groupby is None:
+            groupby, obj = obj, obj.obj
+        self._groupby = groupby
+        self._groupby.mutated = True
+        self._groupby.grouper.mutated = True
+        super().__init__(obj, *args, **kwargs)
+
+    count = GroupByMixin._dispatch("count")
+    corr = GroupByMixin._dispatch("corr", other=None, pairwise=None)
+    cov = GroupByMixin._dispatch("cov", other=None, pairwise=None)
+
+    def _apply(
+        self, func, name=None, window=None, center=None, check_minp=None, **kwargs
+    ):
+        """
+        Dispatch to apply; we are stripping all of the _apply kwargs and
+        performing the original function call on the grouped object.
+        """
+
+        def f(x, name=name, *args):
+            x = self._shallow_copy(x)
+
+            if isinstance(name, str):
+                return getattr(x, name)(*args, **kwargs)
+
+            return x.apply(name, *args, **kwargs)
+
+        return self._groupby.apply(f)
+
+
+def _flex_binary_moment(arg1, arg2, f, pairwise=False):
+
+    if not (
+        isinstance(arg1, (np.ndarray, ABCSeries, ABCDataFrame))
+        and isinstance(arg2, (np.ndarray, ABCSeries, ABCDataFrame))
+    ):
+        raise TypeError(
+            "arguments to moment function must be of type "
+            "np.ndarray/Series/DataFrame"
+        )
+
+    if isinstance(arg1, (np.ndarray, ABCSeries)) and isinstance(
+        arg2, (np.ndarray, ABCSeries)
+    ):
+        X, Y = _prep_binary(arg1, arg2)
+        return f(X, Y)
+
+    elif isinstance(arg1, ABCDataFrame):
+        from pandas import DataFrame
+
+        def dataframe_from_int_dict(data, frame_template):
+            result = DataFrame(data, index=frame_template.index)
+            if len(result.columns) > 0:
+                result.columns = frame_template.columns[result.columns]
+            return result
+
+        results = {}
+        if isinstance(arg2, ABCDataFrame):
+            if pairwise is False:
+                if arg1 is arg2:
+                    # special case in order to handle duplicate column names
+                    for i, col in enumerate(arg1.columns):
+                        results[i] = f(arg1.iloc[:, i], arg2.iloc[:, i])
+                    return dataframe_from_int_dict(results, arg1)
+                else:
+                    if not arg1.columns.is_unique:
+                        raise ValueError("'arg1' columns are not unique")
+                    if not arg2.columns.is_unique:
+                        raise ValueError("'arg2' columns are not unique")
+                    with warnings.catch_warnings(record=True):
+                        warnings.simplefilter("ignore", RuntimeWarning)
+                        X, Y = arg1.align(arg2, join="outer")
+                    X = X + 0 * Y
+                    Y = Y + 0 * X
+
+                    with warnings.catch_warnings(record=True):
+                        warnings.simplefilter("ignore", RuntimeWarning)
+                        res_columns = arg1.columns.union(arg2.columns)
+                    for col in res_columns:
+                        if col in X and col in Y:
+                            results[col] = f(X[col], Y[col])
+                    return DataFrame(results, index=X.index, columns=res_columns)
+            elif pairwise is True:
+                results = defaultdict(dict)
+                for i, k1 in enumerate(arg1.columns):
+                    for j, k2 in enumerate(arg2.columns):
+                        if j < i and arg2 is arg1:
+                            # Symmetric case
+                            results[i][j] = results[j][i]
+                        else:
+                            results[i][j] = f(
+                                *_prep_binary(arg1.iloc[:, i], arg2.iloc[:, j])
+                            )
+
+                from pandas import concat
+
+                result_index = arg1.index.union(arg2.index)
+                if len(result_index):
+
+                    # construct result frame
+                    result = concat(
+                        [
+                            concat(
+                                [results[i][j] for j, c in enumerate(arg2.columns)],
+                                ignore_index=True,
+                            )
+                            for i, c in enumerate(arg1.columns)
+                        ],
+                        ignore_index=True,
+                        axis=1,
+                    )
+                    result.columns = arg1.columns
+
+                    # set the index and reorder
+                    if arg2.columns.nlevels > 1:
+                        result.index = MultiIndex.from_product(
+                            arg2.columns.levels + [result_index]
+                        )
+                        result = result.reorder_levels([2, 0, 1]).sort_index()
+                    else:
+                        result.index = MultiIndex.from_product(
+                            [range(len(arg2.columns)), range(len(result_index))]
+                        )
+                        result = result.swaplevel(1, 0).sort_index()
+                        result.index = MultiIndex.from_product(
+                            [result_index] + [arg2.columns]
+                        )
+                else:
+
+                    # empty result
+                    result = DataFrame(
+                        index=MultiIndex(
+                            levels=[arg1.index, arg2.columns], codes=[[], []]
+                        ),
+                        columns=arg2.columns,
+                        dtype="float64",
+                    )
+
+                # reset our index names to arg1 names
+                # reset our column names to arg2 names
+                # careful not to mutate the original names
+                result.columns = result.columns.set_names(arg1.columns.names)
+                result.index = result.index.set_names(
+                    result_index.names + arg2.columns.names
+                )
+
+                return result
+
+            else:
+                raise ValueError("'pairwise' is not True/False")
+        else:
+            results = {
+                i: f(*_prep_binary(arg1.iloc[:, i], arg2))
+                for i, col in enumerate(arg1.columns)
+            }
+            return dataframe_from_int_dict(results, arg1)
+
+    else:
+        return _flex_binary_moment(arg2, arg1, f)
+
+
+def _get_center_of_mass(comass, span, halflife, alpha):
+    valid_count = com.count_not_none(comass, span, halflife, alpha)
+    if valid_count > 1:
+        raise ValueError("comass, span, halflife, and alpha are mutually exclusive")
+
+    # Convert to center of mass; domain checks ensure 0 < alpha <= 1
+    if comass is not None:
+        if comass < 0:
+            raise ValueError("comass must satisfy: comass >= 0")
+    elif span is not None:
+        if span < 1:
+            raise ValueError("span must satisfy: span >= 1")
+        comass = (span - 1) / 2.0
+    elif halflife is not None:
+        if halflife <= 0:
+            raise ValueError("halflife must satisfy: halflife > 0")
+        decay = 1 - np.exp(np.log(0.5) / halflife)
+        comass = 1 / decay - 1
+    elif alpha is not None:
+        if alpha <= 0 or alpha > 1:
+            raise ValueError("alpha must satisfy: 0 < alpha <= 1")
+        comass = (1.0 - alpha) / alpha
+    else:
+        raise ValueError("Must pass one of comass, span, halflife, or alpha")
+
+    return float(comass)
+
+
+def _offset(window, center):
+    if not is_integer(window):
+        window = len(window)
+    offset = (window - 1) / 2.0 if center else 0
+    try:
+        return int(offset)
+    except TypeError:
+        return offset.astype(int)
+
+
+def _require_min_periods(p):
+    def _check_func(minp, window):
+        if minp is None:
+            return window
+        else:
+            return max(p, minp)
+
+    return _check_func
+
+
+def _use_window(minp, window):
+    if minp is None:
+        return window
+    else:
+        return minp
+
+
+def _zsqrt(x):
+    with np.errstate(all="ignore"):
+        result = np.sqrt(x)
+        mask = x < 0
+
+    if isinstance(x, ABCDataFrame):
+        if mask.values.any():
+            result[mask] = 0
+    else:
+        if mask.any():
+            result[mask] = 0
+
+    return result
+
+
+def _prep_binary(arg1, arg2):
+    if not isinstance(arg2, type(arg1)):
+        raise Exception("Input arrays must be of the same type!")
+
+    # mask out values, this also makes a common index...
+    X = arg1 + 0 * arg2
+    Y = arg2 + 0 * arg1
+
+    return X, Y