Skip to content

Commit

Permalink
[skip-ci] Add benchmarks for groupby math (#6390)
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian authored Mar 21, 2022
1 parent 83f238a commit 511d36c
Showing 1 changed file with 44 additions and 0 deletions.
44 changes: 44 additions & 0 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def setup(self, *args, **kwargs):
}
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds2d_mean = self.ds2d.groupby("b").mean()

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
Expand All @@ -31,6 +33,18 @@ def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("b"), method)()

def time_groupby_binary_op_1d(self):
self.ds1d - self.ds1d_mean

def time_groupby_binary_op_2d(self):
self.ds2d - self.ds2d_mean

def peakmem_groupby_binary_op_1d(self):
self.ds1d - self.ds1d_mean

def peakmem_groupby_binary_op_2d(self):
self.ds2d - self.ds2d_mean


class GroupByDask(GroupBy):
def setup(self, *args, **kwargs):
Expand All @@ -40,6 +54,8 @@ def setup(self, *args, **kwargs):
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2)).chunk(
{"dim_0": 50, "z": 5}
)
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds2d_mean = self.ds2d.groupby("b").mean()


class GroupByPandasDataFrame(GroupBy):
Expand All @@ -51,6 +67,13 @@ def setup(self, *args, **kwargs):

super().setup(**kwargs)
self.ds1d = self.ds1d.to_dataframe()
self.ds1d_mean = self.ds1d.groupby("b").mean()

def time_groupby_binary_op_2d(self):
raise NotImplementedError

def peakmem_groupby_binary_op_2d(self):
raise NotImplementedError


class GroupByDaskDataFrame(GroupBy):
Expand All @@ -63,6 +86,13 @@ def setup(self, *args, **kwargs):
requires_dask()
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
self.ds1d_mean = self.ds1d.groupby("b").mean()

def time_groupby_binary_op_2d(self):
raise NotImplementedError

def peakmem_groupby_binary_op_2d(self):
raise NotImplementedError


class Resample:
Expand All @@ -74,6 +104,8 @@ def setup(self, *args, **kwargs):
coords={"time": pd.date_range("2001-01-01", freq="H", periods=365 * 24)},
)
self.ds2d = self.ds1d.expand_dims(z=10)
self.ds1d_mean = self.ds1d.resample(time="48H").mean()
self.ds2d_mean = self.ds2d.resample(time="48H").mean()

@parameterized(["ndim"], [(1, 2)])
def time_init(self, ndim):
Expand All @@ -89,6 +121,18 @@ def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="48H"), method)()

def time_groupby_binary_op_1d(self):
self.ds1d - self.ds1d_mean

def time_groupby_binary_op_2d(self):
self.ds2d - self.ds2d_mean

def peakmem_groupby_binary_op_1d(self):
self.ds1d - self.ds1d_mean

def peakmem_groupby_binary_op_2d(self):
self.ds2d - self.ds2d_mean


class ResampleDask(Resample):
def setup(self, *args, **kwargs):
Expand Down

0 comments on commit 511d36c

Please sign in to comment.