Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REF: rolling benchmarks to reduce redundant benchmarks #44475

Merged
merged 1 commit into from
Nov 15, 2021
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
208 changes: 113 additions & 95 deletions asv_bench/benchmarks/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,24 @@ class Methods:

params = (
["DataFrame", "Series"],
[10, 1000],
[("rolling", {"window": 10}), ("rolling", {"window": 1000}), ("expanding", {})],
["int", "float"],
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"],
)
param_names = ["constructor", "window", "dtype", "method"]
param_names = ["constructor", "window_kwargs", "dtype", "method"]

def setup(self, constructor, window, dtype, method):
def setup(self, constructor, window_kwargs, dtype, method):
N = 10 ** 5
window, kwargs = window_kwargs
arr = (100 * np.random.random(N)).astype(dtype)
self.roll = getattr(pd, constructor)(arr).rolling(window)
obj = getattr(pd, constructor)(arr)
self.window = getattr(obj, window)(**kwargs)

def time_rolling(self, constructor, window, dtype, method):
getattr(self.roll, method)()
def time_method(self, constructor, window_kwargs, dtype, method):
getattr(self.window, method)()

def peakmem_rolling(self, constructor, window, dtype, method):
getattr(self.roll, method)()
def peakmem_method(self, constructor, window_kwargs, dtype, method):
getattr(self.window, method)()


class Apply:
Expand All @@ -46,148 +48,160 @@ def time_rolling(self, constructor, window, dtype, function, raw):
self.roll.apply(function, raw=raw)


class NumbaEngine:
class NumbaEngineMethods:
params = (
["DataFrame", "Series"],
["int", "float"],
[np.sum, lambda x: np.sum(x) + 5],
[("rolling", {"window": 10}), ("expanding", {})],
["sum", "max", "min", "median", "mean"],
[True, False],
[None, 100],
)
param_names = ["constructor", "dtype", "function", "method", "parallel", "cols"]
param_names = [
"constructor",
"dtype",
"window_kwargs",
"method",
"parallel",
"cols",
]

def setup(self, constructor, dtype, function, method, parallel, cols):
def setup(self, constructor, dtype, window_kwargs, method, parallel, cols):
N = 10 ** 3
window, kwargs = window_kwargs
shape = (N, cols) if cols is not None and constructor != "Series" else N
arr = (100 * np.random.random(shape)).astype(dtype)
data = getattr(pd, constructor)(arr)

# Warm the cache
with warnings.catch_warnings(record=True):
# Catch parallel=True not being applicable e.g. 1D data
self.roll = data.rolling(10)
self.roll.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)
getattr(self.roll, method)(
self.window = getattr(data, window)(**kwargs)
getattr(self.window, method)(
engine="numba", engine_kwargs={"parallel": parallel}
)

self.expand = data.expanding()
self.expand.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)

def time_rolling_apply(self, constructor, dtype, function, method, parallel, col):
with warnings.catch_warnings(record=True):
self.roll.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)

def time_expanding_apply(self, constructor, dtype, function, method, parallel, col):
with warnings.catch_warnings(record=True):
self.expand.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)

def time_rolling_methods(self, constructor, dtype, function, method, parallel, col):
def test_method(self, constructor, dtype, window_kwargs, method, parallel, cols):
with warnings.catch_warnings(record=True):
getattr(self.roll, method)(
getattr(self.window, method)(
engine="numba", engine_kwargs={"parallel": parallel}
)


class ExpandingMethods:

class NumbaEngineApply:
params = (
["DataFrame", "Series"],
["int", "float"],
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
[("rolling", {"window": 10}), ("expanding", {})],
[np.sum, lambda x: np.sum(x) + 5],
[True, False],
[None, 100],
)
param_names = ["constructor", "window", "dtype", "method"]
param_names = [
"constructor",
"dtype",
"window_kwargs",
"function",
"parallel",
"cols",
]

def setup(self, constructor, dtype, method):
N = 10 ** 5
N_groupby = 100
arr = (100 * np.random.random(N)).astype(dtype)
self.expanding = getattr(pd, constructor)(arr).expanding()
self.expanding_groupby = (
pd.DataFrame({"A": arr[:N_groupby], "B": range(N_groupby)})
.groupby("B")
.expanding()
)
def setup(self, constructor, dtype, window_kwargs, function, parallel, cols):
N = 10 ** 3
window, kwargs = window_kwargs
shape = (N, cols) if cols is not None and constructor != "Series" else N
arr = (100 * np.random.random(shape)).astype(dtype)
data = getattr(pd, constructor)(arr)

def time_expanding(self, constructor, dtype, method):
getattr(self.expanding, method)()
# Warm the cache
with warnings.catch_warnings(record=True):
# Catch parallel=True not being applicable e.g. 1D data
self.window = getattr(data, window)(**kwargs)
self.window.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)

def time_expanding_groupby(self, constructor, dtype, method):
getattr(self.expanding_groupby, method)()
def test_method(self, constructor, dtype, window_kwargs, function, parallel, cols):
with warnings.catch_warnings(record=True):
self.window.apply(
function, raw=True, engine="numba", engine_kwargs={"parallel": parallel}
)


class EWMMethods:

params = (["DataFrame", "Series"], [10, 1000], ["int", "float"], ["mean", "std"])
param_names = ["constructor", "window", "dtype", "method"]
params = (
["DataFrame", "Series"],
[
({"halflife": 10}, "mean"),
({"halflife": 10}, "std"),
({"halflife": 1000}, "mean"),
({"halflife": 1000}, "std"),
(
{
"halflife": "1 Day",
"times": pd.date_range("1900", periods=10 ** 5, freq="23s"),
},
"mean",
),
],
["int", "float"],
)
param_names = ["constructor", "kwargs_method", "dtype"]

def setup(self, constructor, window, dtype, method):
def setup(self, constructor, kwargs_method, dtype):
N = 10 ** 5
kwargs, method = kwargs_method
arr = (100 * np.random.random(N)).astype(dtype)
times = pd.date_range("1900", periods=N, freq="23s")
self.ewm = getattr(pd, constructor)(arr).ewm(halflife=window)
self.ewm_times = getattr(pd, constructor)(arr).ewm(
halflife="1 Day", times=times
)
self.method = method
self.ewm = getattr(pd, constructor)(arr).ewm(**kwargs)

def time_ewm(self, constructor, window, dtype, method):
getattr(self.ewm, method)()

def time_ewm_times(self, constructor, window, dtype, method):
self.ewm_times.mean()
def time_ewm(self, constructor, kwargs_method, dtype):
getattr(self.ewm, self.method)()


class VariableWindowMethods(Methods):
params = (
["DataFrame", "Series"],
["50s", "1h", "1d"],
["int", "float"],
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum"],
["median", "mean", "max", "min", "std", "count", "skew", "kurt", "sum", "sem"],
)
param_names = ["constructor", "window", "dtype", "method"]

def setup(self, constructor, window, dtype, method):
N = 10 ** 5
arr = (100 * np.random.random(N)).astype(dtype)
index = pd.date_range("2017-01-01", periods=N, freq="5s")
self.roll = getattr(pd, constructor)(arr, index=index).rolling(window)
self.window = getattr(pd, constructor)(arr, index=index).rolling(window)


class Pairwise:

params = ([10, 1000, None], ["corr", "cov"], [True, False])
param_names = ["window", "method", "pairwise"]
params = (
[({"window": 10}, "rolling"), ({"window": 1000}, "rolling"), ({}, "expanding")],
["corr", "cov"],
[True, False],
)
param_names = ["window_kwargs", "method", "pairwise"]

def setup(self, window, method, pairwise):
def setup(self, kwargs_window, method, pairwise):
N = 10 ** 4
n_groups = 20
kwargs, window = kwargs_window
groups = [i for _ in range(N // n_groups) for i in range(n_groups)]
arr = np.random.random(N)
self.df = pd.DataFrame(arr)
self.df_group = pd.DataFrame({"A": groups, "B": arr}).groupby("A")
self.window = getattr(self.df, window)(**kwargs)
self.window_group = getattr(
pd.DataFrame({"A": groups, "B": arr}).groupby("A"), window
)(**kwargs)

def time_pairwise(self, window, method, pairwise):
if window is None:
r = self.df.expanding()
else:
r = self.df.rolling(window=window)
getattr(r, method)(self.df, pairwise=pairwise)
def time_pairwise(self, kwargs_window, method, pairwise):
getattr(self.window, method)(self.df, pairwise=pairwise)

def time_groupby(self, window, method, pairwise):
if window is None:
r = self.df_group.expanding()
else:
r = self.df_group.rolling(window=window)
getattr(r, method)(self.df, pairwise=pairwise)
def time_groupby(self, kwargs_window, method, pairwise):
getattr(self.window_group, method)(self.df, pairwise=pairwise)


class Quantile:
Expand Down Expand Up @@ -274,25 +288,29 @@ def peakmem_rolling(self, constructor, window_size, dtype, method):

class Groupby:

params = ["sum", "median", "mean", "max", "min", "kurt", "sum"]
params = (
["sum", "median", "mean", "max", "min", "kurt", "sum"],
[
("rolling", {"window": 2}),
("rolling", {"window": "30s", "on": "C"}),
("expanding", {}),
],
)

def setup(self, method):
def setup(self, method, window_kwargs):
N = 1000
window, kwargs = window_kwargs
df = pd.DataFrame(
{
"A": [str(i) for i in range(N)] * 10,
"B": list(range(N)) * 10,
"C": pd.date_range(start="1900-01-01", freq="1min", periods=N * 10),
}
)
self.groupby_roll_int = df.groupby("A").rolling(window=2)
self.groupby_roll_offset = df.groupby("A").rolling(window="30s", on="C")

def time_rolling_int(self, method):
getattr(self.groupby_roll_int, method)()
self.groupby_window = getattr(df.groupby("A"), window)(**kwargs)

def time_rolling_offset(self, method):
getattr(self.groupby_roll_offset, method)()
def time_method(self, method, window_kwargs):
getattr(self.groupby_window, method)()


class GroupbyLargeGroups:
Expand Down