From e2d4da441609e5e82390f6cb860aa73d6a8fa6f0 Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 31 Oct 2016 23:21:31 +0000 Subject: [PATCH 1/2] Small fixes and optimizations for datashader operations --- holoviews/operation/datashader.py | 64 ++++++++++++++++++++++--------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index 79f9f3c8cc..9dd2b48151 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -22,13 +22,20 @@ from ..streams import RangeXY +def is_df(obj): + """ + Check if the Element is a Dataset type wrapping DataFrame-like data. + """ + return isinstance(obj, Dataset) and obj.interface is PandasInterface + + @dispatch(Element) def discover(dataset): """ Allows datashader to correctly discover the dtypes of the data in a holoviews Element. """ - if isinstance(dataset.interface, (PandasInterface, ArrayInterface)): + if dataset.interface in [PandasInterface, ArrayInterface]: return dsdiscover(dataset.data) else: return dsdiscover(dataset.dframe()) @@ -54,21 +61,11 @@ def dataset_pipeline(dataset, schema, canvas, glyph, summary): vdims = [dataset.get_dimension(column)(name) if column else Dimension('Count')] - agg = pandas_pipeline(dataset.dframe(), schema, canvas, - glyph, summary) + agg = pandas_pipeline(dataset.data, schema, canvas, + glyph, summary) agg = agg.rename({'x_axis': kdims[0].name, 'y_axis': kdims[1].name}) - - params = dict(get_param_values(dataset), kdims=kdims, - datatype=['xarray'], vdims=vdims) - - if agg.ndim == 2: - return GridImage(agg, **params) - else: - return NdOverlay({c: GridImage(agg.sel(**{column: c}), - **params) - for c in agg.coords[column].data}, - kdims=[dataset.get_dimension(column)]) + return agg class aggregate(ElementOperation): @@ -120,6 +117,11 @@ class aggregate(ElementOperation): List of streams that are applied if dynamic=True, allowing for dynamic interaction with the plot.""") + element_type = param.ClassSelector(class_=(Dataset,), instantiate=False, + is_instance=False, default=GridImage, + doc=""" + The type of the returned Elements, must be a 2D Dataset type.""") + @classmethod def get_agg_data(cls, obj, category=None): """ @@ -140,7 +142,7 @@ def get_agg_data(cls, obj, category=None): elif isinstance(obj, CompositeOverlay): for key, el in obj.data.items(): x, y, element, glyph = cls.get_agg_data(el) - df = element.dframe() + df = element.data if is_df(element) else element.dframe() if isinstance(obj, NdOverlay): df = df.assign(**dict(zip(obj.dimensions('key', True), key))) paths.append(df) @@ -148,12 +150,15 @@ def get_agg_data(cls, obj, category=None): vdims = element.vdims elif isinstance(obj, Element): glyph = 'line' if isinstance(obj, Curve) else 'points' - paths.append(obj.dframe()) + paths.append(obj.data if is_df(obj) else obj.dframe()) if glyph == 'line': empty = paths[0][:1].copy() empty.loc[0, :] = (np.NaN,) * empty.shape[1] paths = [elem for path in paths for elem in (path, empty)][:-1] - df = pd.concat(paths).reset_index(drop=True) + if len(paths) > 1: + df = pd.concat(paths).reset_index(drop=True) + else: + df = paths[0] if category and df[category].dtype.name != 'category': df[category] = df[category].astype('category') return x, y, Dataset(df, kdims=kdims, vdims=vdims), glyph @@ -178,7 +183,26 @@ def _process(self, element, key=None): cvs = ds.Canvas(plot_width=width, plot_height=height, x_range=(xstart, xend), y_range=(ystart, yend)) - return getattr(cvs, glyph)(data, x, y, self.p.aggregator) + + column = agg_fn.column + if column and isinstance(agg_fn, ds.count_cat): + name = '%s Count' % agg_fn.column + else: + name = column + vdims = [element.get_dimension(column)(name) if column + else Dimension('Count')] + params = dict(get_param_values(element), kdims=[element.dimensions()[0:2]], + datatype=['xarray'], vdims=vdims) + + agg = getattr(cvs, glyph)(data, x, y, self.p.aggregator) + if agg.ndim == 2: + return self.p.element_type(agg, **params) + else: + return NdOverlay({c: self.p.element_type(agg.sel(**{column: c}), + **params) + for c in agg.coords[column].data}, + kdims=[data.get_dimension(column)]) + @@ -195,7 +219,7 @@ class shade(ElementOperation): Iterable or a Callable. """ - cmap = param.ClassSelector(class_=(Iterable, Callable), doc=""" + cmap = param.ClassSelector(class_=(Iterable, Callable, dict), doc=""" Iterable or callable which returns colors as hex colors. Callable type must allow mapping colors between 0 and 1.""") @@ -259,6 +283,8 @@ def _process(self, element, key=None): categories = array.shape[-1] if not self.p.cmap: pass + elif isinstance(self.p.cmap, dict): + shade_opts['color_key'] = self.p.cmap elif isinstance(self.p.cmap, Iterable): shade_opts['color_key'] = [c for i, c in zip(range(categories), self.p.cmap)] From 9dac3fabcce5bcb6efc715f7fb07e84e8baeffcd Mon Sep 17 00:00:00 2001 From: Philipp Rudiger Date: Mon, 31 Oct 2016 23:44:38 +0000 Subject: [PATCH 2/2] Minor refactoring of datashader operations --- holoviews/operation/datashader.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/holoviews/operation/datashader.py b/holoviews/operation/datashader.py index 9dd2b48151..7453dd87a0 100644 --- a/holoviews/operation/datashader.py +++ b/holoviews/operation/datashader.py @@ -22,13 +22,6 @@ from ..streams import RangeXY -def is_df(obj): - """ - Check if the Element is a Dataset type wrapping DataFrame-like data. - """ - return isinstance(obj, Dataset) and obj.interface is PandasInterface - - @dispatch(Element) def discover(dataset): """ @@ -132,6 +125,7 @@ def get_agg_data(cls, obj, category=None): kdims = obj.kdims vdims = obj.vdims x, y = obj.dimensions(label=True)[:2] + is_df = lambda x: isinstance(x, Dataset) and x.interface is PandasInterface if isinstance(obj, Path): glyph = 'line' for p in obj.data: