Skip to content

Commit

Permalink
[skip-ci] Add compute to groupby benchmarks (#7690)
Browse files Browse the repository at this point in the history
* [skip-ci] Add compute to groupby benchmarks

* [skip-ci] Update asv_bench/benchmarks/groupby.py
  • Loading branch information
dcherian authored Mar 29, 2023
1 parent 0159e45 commit 4fc8445
Showing 1 changed file with 13 additions and 11 deletions.
24 changes: 13 additions & 11 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# import flox to avoid the cost of first import
import flox.xarray # noqa
import numpy as np
import pandas as pd

Expand Down Expand Up @@ -27,24 +29,24 @@ def time_init(self, ndim):
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("a"), method)()
getattr(ds.groupby("a"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.groupby("b"), method)()
getattr(ds.groupby("b"), method)().compute()

def time_binary_op_1d(self):
self.ds1d.groupby("b") - self.ds1d_mean
(self.ds1d.groupby("b") - self.ds1d_mean).compute()

def time_binary_op_2d(self):
self.ds2d.groupby("b") - self.ds2d_mean
(self.ds2d.groupby("b") - self.ds2d_mean).compute()

def peakmem_binary_op_1d(self):
self.ds1d.groupby("b") - self.ds1d_mean
(self.ds1d.groupby("b") - self.ds1d_mean).compute()

def peakmem_binary_op_2d(self):
self.ds2d.groupby("b") - self.ds2d_mean
(self.ds2d.groupby("b") - self.ds2d_mean).compute()


class GroupByDask(GroupBy):
Expand All @@ -56,8 +58,8 @@ def setup(self, *args, **kwargs):
self.ds1d["c"] = self.ds1d["c"].chunk({"dim_0": 50})
self.ds2d = self.ds2d.sel(dim_0=slice(None, None, 2))
self.ds2d["c"] = self.ds2d["c"].chunk({"dim_0": 50, "z": 5})
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds2d_mean = self.ds2d.groupby("b").mean()
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()
self.ds2d_mean = self.ds2d.groupby("b").mean().compute()


class GroupByPandasDataFrame(GroupBy):
Expand Down Expand Up @@ -88,7 +90,7 @@ def setup(self, *args, **kwargs):
requires_dask()
super().setup(**kwargs)
self.ds1d = self.ds1d.chunk({"dim_0": 50}).to_dataframe()
self.ds1d_mean = self.ds1d.groupby("b").mean()
self.ds1d_mean = self.ds1d.groupby("b").mean().compute()

def time_binary_op_2d(self):
raise NotImplementedError
Expand Down Expand Up @@ -116,12 +118,12 @@ def time_init(self, ndim):
@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_small_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="3M"), method)()
getattr(ds.resample(time="3M"), method)().compute()

@parameterized(["method", "ndim"], [("sum", "mean"), (1, 2)])
def time_agg_large_num_groups(self, method, ndim):
ds = getattr(self, f"ds{ndim}d")
getattr(ds.resample(time="48H"), method)()
getattr(ds.resample(time="48H"), method)().compute()


class ResampleDask(Resample):
Expand Down

0 comments on commit 4fc8445

Please sign in to comment.