diff --git a/hvplot/converter.py b/hvplot/converter.py index 25c624b6f..255b14060 100644 --- a/hvplot/converter.py +++ b/hvplot/converter.py @@ -64,6 +64,10 @@ process_crs, process_intake, process_xarray, + relabel, + relabel_redim, + redim_, + support_index, check_library, is_geodataframe, process_derived_datetime_xarray, @@ -928,7 +932,11 @@ def _process_symmetric(self, symmetric, clim, check_symmetric_max): return False elif self._color_dim: - data = self.data[self._color_dim] + data = ( + self.data[self._color_dim] + if self._color_dim in self.data.columns + else self.data.index.get_level_values(self._color_dim) + ) else: return @@ -1181,7 +1189,7 @@ def _process_data( if gridded_data: not_found = [g for g in groupby if g not in data.coords] - not_found, _, _ = process_derived_datetime_xarray(data, not_found) + post_not_found, _, _ = process_derived_datetime_xarray(data, not_found) data_vars = list(data.data_vars) if isinstance(data, xr.Dataset) else [data.name] indexes = list(data.coords.indexes) # Handle undeclared indexes @@ -1198,7 +1206,8 @@ def _process_data( if coord not in groupby + by: groupby.append(data_dim) self.variables = list(data.coords) + data_vars - if groupby and not_found: + self.variables.extend([item for item in not_found if item not in post_not_found]) + if groupby and post_not_found: raise ValueError( f'The supplied groupby dimension(s) {not_found} ' 'could not be found, expected one or ' @@ -1228,6 +1237,9 @@ def _process_data( and y is None and not by ): + # Broken, see https://github.com/holoviz/hvplot/issues/1364. + # Dask reset_index doesn't accept a level, so this would need to + # be adapted for Dask. self.data = data.stack().reset_index(1).rename(columns={'level_1': group_label}) by = group_label x = 'index' @@ -1254,9 +1266,8 @@ def _process_data( self.variables = indexes + list(self.data.columns) # Reset groupby dimensions - groupby_index = [g for g in groupby if g in indexes] - if groupby_index: - self.data = self.data.reset_index(groupby_index) + if not support_index(self.data) and any(g for g in groupby if g in indexes): + self.data = self.data.reset_index() if isinstance(by, (np.ndarray, pd.Series)): by_cols = [] @@ -1265,13 +1276,16 @@ def _process_data( not_found = [ g for g in groupby + by_cols if g not in list(self.data.columns) + indexes ] - not_found, self.data = process_derived_datetime_pandas(self.data, not_found, indexes) - if groupby and not_found: + post_not_found, self.data = process_derived_datetime_pandas( + self.data, not_found, indexes + ) + if groupby and post_not_found: raise ValueError( f'The supplied groupby dimension(s) {not_found} ' 'could not be found, expected one or ' f'more of: {list(self.data.columns)}' ) + self.variables.extend([item for item in not_found if item not in post_not_found]) if transforms: self.data = Dataset(self.data, indexes).transform(**transforms).data @@ -1545,7 +1559,11 @@ def __call__(self, kind, x, y): if self.streaming: raise NotImplementedError('Streaming and groupby not yet implemented') data = self.data - if not self.gridded and any(g in self.indexes for g in groups): + if ( + not support_index(data) + and not self.gridded + and any(g in self.indexes for g in groups) + ): data = data.reset_index() if self.datatype in ('geopandas', 'spatialpandas'): @@ -1568,8 +1586,8 @@ def __call__(self, kind, x, y): name = data.name or self.label or self.value_label dataset = Dataset(data, self.indexes, name) else: - dataset = Dataset(data) - dataset = dataset.redim(**self._redim) + dataset = Dataset(data, self.variables) + dataset = redim_(dataset, **self._redim) if groups: datasets = dataset.groupby(groups, dynamic=self.dynamic) @@ -1660,7 +1678,7 @@ def method_wrapper(ds, x, y): dataset = Dataset(data, self.indexes) except Exception: dataset = Dataset(data) - dataset = dataset.redim(**self._redim) + dataset = redim_(dataset, **self._redim) obj = method(x, y) obj._dataset = dataset @@ -1996,25 +2014,24 @@ def single_chart(self, element, x, y, data=None): if self.by: if element is Bars and not self.subplots: - if any(y in self.indexes for y in ys): + if not support_index(data) and any(y in self.indexes for y in ys): data = data.reset_index() return ( - element(data, ([x] if x else []) + self.by, ys) - .relabel(**self._relabel) - .redim(**self._redim) + relabel_redim( + element(data, ([x] if x else []) + self.by, ys), self._relabel, self._redim + ) .opts(cur_opts, backend='bokeh') .opts(compat_opts, backend=self._backend_compat) ) - chart = ( - Dataset(data, self.by + kdims, vdims) - .to(element, kdims, vdims, self.by) - .relabel(**self._relabel) + chart = relabel( + Dataset(data, self.by + kdims, vdims).to(element, kdims, vdims, self.by), + **self._relabel, ) chart = chart.layout() if self.subplots else chart.overlay(sort=False) else: - chart = element(data, kdims, vdims).relabel(**self._relabel) + chart = relabel(element(data, kdims, vdims), **self._relabel) return ( - chart.redim(**self._redim) + redim_(chart, **self._redim) .opts(cur_opts, backend='bokeh') .opts(compat_opts, backend=self._backend_compat) ) @@ -2078,8 +2095,10 @@ def _process_chart_args(self, data, x, y, single_y=False, categories=None): data = data.sort_values(x) # set index to column if needed in hover_cols - if self.use_index and any( - c for c in self.hover_cols if c in self.indexes and c not in data.columns + if ( + not support_index(data) + and self.use_index + and any(c for c in self.hover_cols if c in self.indexes and c not in data.columns) ): data = data.reset_index() @@ -2090,7 +2109,8 @@ def _process_chart_args(self, data, x, y, single_y=False, categories=None): dimensions.extend(col if isinstance(col, list) else [col]) not_found = [dim for dim in dimensions if dim not in self.variables] - _, data = process_derived_datetime_pandas(data, not_found, self.indexes) + post_not_found, data = process_derived_datetime_pandas(data, not_found, self.indexes) + self.variables.extend(set(post_not_found) - set(not_found)) return data, x, y @@ -2128,9 +2148,11 @@ def chart(self, element, x, y, data=None): charts = [] for c in y: - kdims, vdims = self._get_dimensions([x], [c]) - chart = element(data, kdims, vdims).redim(**{c: self.value_label}) - charts.append((c, chart.relabel(**self._relabel).redim(**self._redim))) + ydim = hv.Dimension(c, label=self.value_label) + kdims, vdims = self._get_dimensions([x], [ydim]) + chart = element(data, kdims, vdims) + chart = relabel_redim(chart, self._relabel, self._redim) + charts.append((c, chart)) return ( self._by_type(charts, self.group_label, sort=False) .opts(cur_opts, backend='bokeh') @@ -2164,7 +2186,7 @@ def errorbars(self, x=None, y=None, data=None): # Categorical charts # ########################## - def _category_plot(self, element, x, y, data): + def _category_plot(self, element, x: str, y: list[str], data): """ Helper method to generate element from indexed dataframe. """ @@ -2181,6 +2203,8 @@ def _category_plot(self, element, x, y, data): id_vars = [x] if any(v in self.indexes for v in id_vars): + # Calling reset_index() is required since id_vars from melt + # only accepts column names, not index names. data = data.reset_index() data = data[y + [x]] @@ -2196,10 +2220,8 @@ def _category_plot(self, element, x, y, data): obj = Dataset(df, kdims, vdims).to(element, x).layout() else: obj = element(df, kdims, vdims) - return ( - obj.redim(**self._redim) - .relabel(**self._relabel) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) + return relabel_redim(obj, self._relabel, self._redim).apply( + self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) def bar(self, x=None, y=None, data=None): @@ -2231,12 +2253,10 @@ def _stats_plot(self, element, y, data=None): ylim = self._plot_opts.get('ylim', (None, None)) if not isinstance(y, (list, tuple)): ranges = {y: ylim} - return ( - element(data, self.by, y) - .redim.range(**ranges) - .relabel(**self._relabel) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) - ) + return relabel( + element(data, self.by, y).redim.range(**ranges), + **self._relabel, + ).apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) labelled = ['y' if self.invert else 'x'] if self.group_label != 'Group' else [] if self.value_label != 'value': @@ -2259,16 +2279,15 @@ def _stats_plot(self, element, y, data=None): if list(y) and df[self.value_label].dtype is not data[y[0]].dtype: df[self.value_label] = df[self.value_label].astype(data[y[0]].dtype) redim = self._merge_redim({self.value_label: ylim}) - return ( - element(df, kdims, self.value_label) - .redim(**redim) - .relabel(**self._relabel) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) - ) + return relabel_redim( + element(df, kdims, self.value_label), + self._relabel, + redim, + ).apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) def box(self, x=None, y=None, data=None): self._error_if_unavailable('box') - return self._stats_plot(BoxWhisker, y, data).redim(**self._redim) + return redim_(self._stats_plot(BoxWhisker, y, data), **self._redim) def violin(self, x=None, y=None, data=None): self._error_if_unavailable('violin') @@ -2276,7 +2295,7 @@ def violin(self, x=None, y=None, data=None): from holoviews.element import Violin except ImportError: raise ImportError('Violin plot requires HoloViews version >=1.10') - return self._stats_plot(Violin, y, data).redim(**self._redim) + return redim_(self._stats_plot(Violin, y, data), **self._redim) def hist(self, x=None, y=None, data=None): self._error_if_unavailable('hist') @@ -2330,7 +2349,7 @@ def hist(self, x=None, y=None, data=None): hists = histogram(ds, dimension=y, **hist_opts) return ( - hists.redim(**self._redim) + redim_(hists, **self._redim) .opts(cur_opts, backend='bokeh') .opts(compat_opts, backend=self._backend_compat) ) @@ -2352,10 +2371,12 @@ def hist(self, x=None, y=None, data=None): hists = [] for col in y: hist = histogram(ds, dimension=col, **hist_opts) - hists.append((col, hist.relabel(**self._relabel))) + hists.append((col, relabel(hist, **self._relabel))) return ( - self._by_type(hists, self.group_label, sort=False) - .redim(**self._redim) + redim_( + self._by_type(hists, self.group_label, sort=False), + **self._redim, + ) .opts(cur_opts, backend='bokeh') .opts(compat_opts, backend=self._backend_compat) ) @@ -2407,8 +2428,7 @@ def kde(self, x=None, y=None, data=None): dists = NdOverlay({0: Area([], self.value_label, vdim)}, [self.group_label]) redim = self._merge_redim(ranges) return ( - dists.redim(**redim) - .relabel(**self._relabel) + relabel_redim(dists, self._relabel, redim) .opts(cur_opts, backend='bokeh') .opts(compat_opts, backend=self._backend_compat) ) @@ -2424,9 +2444,9 @@ def dataset(self, x=None, y=None, data=None): data = self.data if data is None else data if self.gridded: kdims = [self.x, self.y] if len(self.indexes) == 2 else None - return Dataset(data, kdims=kdims).redim(**self._redim) + return redim_(Dataset(data, kdims=kdims), **self._redim) else: - return Dataset(data, self.kwds.get('columns')).redim(**self._redim) + return redim_(Dataset(data, self.kwds.get('columns')), **self._redim) def heatmap(self, x=None, y=None, data=None): self._error_if_unavailable('heatmap') @@ -2448,7 +2468,7 @@ def heatmap(self, x=None, y=None, data=None): hmap = HeatMap(data, [x, y], z, **self._relabel) if 'reduce_function' in self.kwds: hmap = hmap.aggregate(function=self.kwds['reduce_function']) - return hmap.redim(**redim).apply( + return redim_(hmap, **redim).apply( self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) @@ -2472,10 +2492,8 @@ def hexbin(self, x=None, y=None, data=None): params = dict(self._relabel) if self.geo: params['crs'] = self.crs - return ( - element(data, [x, y], z or [], **params) - .redim(**redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) + return redim_(element(data, [x, y], z or [], **params), **redim).apply( + self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) def bivariate(self, x=None, y=None, data=None): @@ -2485,10 +2503,8 @@ def bivariate(self, x=None, y=None, data=None): cur_opts, compat_opts = self._get_compat_opts('Bivariate', **self.kwds) element = self._get_element('bivariate') - return ( - element(data, [x, y]) - .redim(**self._redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) + return redim_(element(data, [x, y]), **self._redim).apply( + self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) def ohlc(self, x=None, y=None, data=None): @@ -2545,10 +2561,10 @@ def ohlc(self, x=None, y=None, data=None): seg_cur_opts['xlabel'] = '' if x == 'index' else x if 'ylabel' not in seg_cur_opts: seg_cur_opts['ylabel'] = '' - segments = segments.redim(**self._redim).apply( + segments = redim_(segments, **self._redim).apply( self._set_backends_opts, cur_opts=seg_cur_opts, compat_opts=seg_compat_opts ) - rects = rects.redim(**self._redim).apply( + rects = redim_(rects, **self._redim).apply( self._set_backends_opts, cur_opts=rect_cur_opts, compat_opts=rect_compat_opts ) return segments * rects @@ -2557,15 +2573,15 @@ def table(self, x=None, y=None, data=None): self._error_if_unavailable('table') data = self.data if data is None else data if isinstance(data.index, (DatetimeIndex, MultiIndex)): + # To get the index displayed in the table as Bokeh doesn't show it. data = data.reset_index() cur_opts, compat_opts = self._get_compat_opts('Table') element = self._get_element('table') - return ( - element(data, self.kwds.get('columns'), []) - .redim(**self._redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) - ) + return redim_( + element(data, self.kwds.get('columns'), []), + **self._redim, + ).apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) def labels(self, x=None, y=None, data=None): self._error_if_unavailable('labels') @@ -2589,7 +2605,7 @@ def labels(self, x=None, y=None, data=None): labels = labels.layout() if self.subplots else labels.overlay(sort=False) else: labels = element(data, kdims, vdims) - return labels.redim(**self._redim).apply( + return redim_(labels, **self._redim).apply( self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) @@ -2609,8 +2625,10 @@ def _process_gridded_args(self, data, x, y, z): if isinstance(data, xr.DataArray): data = data.to_dataset(name=data.name or 'value') if is_tabular(data): - if self.use_index and any( - c for c in self.hover_cols if c in self.indexes and c not in data.columns + if ( + not support_index(data) + and self.use_index + and any(c for c in self.hover_cols if c in self.indexes and c not in data.columns) ): data = data.reset_index() # calculate any derived time @@ -2620,7 +2638,8 @@ def _process_gridded_args(self, data, x, y, z): dimensions.extend(dimension if isinstance(dimension, list) else [dimension]) not_found = [dim for dim in dimensions if dim not in self.variables] - _, data = process_derived_datetime_pandas(data, not_found, self.indexes) + post_not_found, data = process_derived_datetime_pandas(data, not_found, self.indexes) + self.variables.extend([item for item in not_found if item not in post_not_found]) return data, x, y, z @@ -2648,10 +2667,8 @@ def image(self, x=None, y=None, z=None, data=None): element = self._get_element('image') if self.geo: params['crs'] = self.crs - return ( - element(data, [x, y], z, **params) - .redim(**redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) + return redim_(element(data, [x, y], z, **params), **redim).apply( + self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) def rgb(self, x=None, y=None, z=None, data=None): @@ -2687,7 +2704,7 @@ def rgb(self, x=None, y=None, z=None, data=None): if self.geo: params['crs'] = self.crs rgb = element(eldata, [x, y], element.vdims[:nbands], **params) - return rgb.redim(**self._redim).apply( + return redim_(rgb, **self._redim).apply( self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts ) @@ -2708,11 +2725,10 @@ def quadmesh(self, x=None, y=None, z=None, data=None): cur_opts, compat_opts = self._get_compat_opts('QuadMesh') if self.geo: params['crs'] = self.crs - return ( - element(data, [x, y], z, **params) - .redim(**redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) - ) + return redim_( + element(data, [x, y], z, **params), + **redim, + ).apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) def contour(self, x=None, y=None, z=None, data=None, filled=False): self._error_if_unavailable('contour') @@ -2761,9 +2777,9 @@ def contourf(self, x=None, y=None, z=None, data=None): if self._dim_ranges['c'] != (None, None): z_name = contourf.vdims[0].name redim = {z_name: self._dim_ranges['c']} + return contourf.redim.range(**redim) else: - redim = {} - return contourf.redim.range(**redim) + return contourf def vectorfield(self, x=None, y=None, angle=None, mag=None, data=None): self._error_if_unavailable('vectorfield') @@ -2782,11 +2798,10 @@ def vectorfield(self, x=None, y=None, angle=None, mag=None, data=None): cur_opts, compat_opts = self._get_compat_opts('VectorField') if self.geo: params['crs'] = self.crs - return ( - element(data, [x, y], z, **params) - .redim(**redim) - .apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) - ) + return redim_( + element(data, [x, y], z, **params), + **redim, + ).apply(self._set_backends_opts, cur_opts=cur_opts, compat_opts=compat_opts) ########################## # Geometry plots # @@ -2832,7 +2847,7 @@ def _geom_plot(self, x=None, y=None, data=None, kind='polygons'): obj = element(data, kdims, vdims, **params) return ( - obj.redim(**redim) + redim_(obj, **redim) .opts({element.name: cur_opts}, backend='bokeh') .opts({element.name: compat_opts}, backend=self._backend_compat) ) diff --git a/hvplot/tests/testcharts.py b/hvplot/tests/testcharts.py index b37c4cf51..3c8de7215 100644 --- a/hvplot/tests/testcharts.py +++ b/hvplot/tests/testcharts.py @@ -3,6 +3,7 @@ import numpy as np import pandas as pd +import pytest from holoviews.core.dimension import Dimension from holoviews import NdLayout, NdOverlay, Store, dim, render @@ -50,18 +51,12 @@ def test_2d_set_hover_cols_to_list(self, kind, element): @parameterized.expand([('points', Points), ('paths', Path)]) def test_2d_set_hover_cols_including_index(self, kind, element): plot = self.cat_df.hvplot(x='x', y='y', hover_cols=['index'], kind=kind) - data = plot.data[0] if kind == 'paths' else plot.data - assert 'index' in data.columns - self.assertEqual(plot, element(self.cat_df.reset_index(), ['x', 'y'], ['index'])) + self.assertEqual(plot, element(self.cat_df, ['x', 'y'], ['index'])) @parameterized.expand([('points', Points), ('paths', Path)]) def test_2d_set_hover_cols_to_all(self, kind, element): plot = self.cat_df.hvplot(x='x', y='y', hover_cols='all', kind=kind) - data = plot.data[0] if kind == 'paths' else plot.data - assert 'index' in data.columns - self.assertEqual( - plot, element(self.cat_df.reset_index(), ['x', 'y'], ['index', 'category']) - ) + self.assertEqual(plot, element(self.cat_df, ['x', 'y'], ['index', 'category'])) @parameterized.expand([('points', Points), ('paths', Path)]) def test_2d_set_hover_cols_to_all_with_use_index_as_false(self, kind, element): @@ -111,6 +106,22 @@ def setUp(self): def test_heatmap_2d_index_columns(self): self.df.hvplot.heatmap() + @parameterized.expand([('points', Points), ('paths', Path)]) + def test_2d_set_hover_cols_including_index(self, kind, element): + plot = self.cat_df.hvplot(x='x', y='y', hover_cols=['index'], kind=kind) + data = plot.data[0] if kind == 'paths' else plot.data + assert 'index' in data.columns + self.assertEqual(plot, element(self.cat_df.reset_index(), ['x', 'y'], ['index'])) + + @parameterized.expand([('points', Points), ('paths', Path)]) + def test_2d_set_hover_cols_to_all(self, kind, element): + plot = self.cat_df.hvplot(x='x', y='y', hover_cols='all', kind=kind) + data = plot.data[0] if kind == 'paths' else plot.data + assert 'index' in data.columns + self.assertEqual( + plot, element(self.cat_df.reset_index(), ['x', 'y'], ['index', 'category']) + ) + class TestChart1D(ComparisonTestCase): def setUp(self): @@ -124,9 +135,15 @@ def setUp(self): self.cat_df = pd.DataFrame( [[1, 2, 'A'], [3, 4, 'B'], [5, 6, 'C']], columns=['x', 'y', 'category'] ) + self.cat_df_index = self.cat_df.set_index('category') + self.cat_df_index_y = self.cat_df.set_index('y') self.cat_only_df = pd.DataFrame( [['A', 'a'], ['B', 'b'], ['C', 'c']], columns=['upper', 'lower'] ) + multii_df = pd.DataFrame( + {'A': [1, 2, 3, 4], 'B': ['a', 'a', 'b', 'b'], 'C': [0, 1, 2, 1.5]} + ) + self.multii_df = multii_df.set_index(['A', 'B']) self.time_df = pd.DataFrame( { 'time': pd.date_range('1/1/2000', periods=10, tz='UTC'), @@ -147,8 +164,8 @@ def test_wide_chart(self, kind, element): plot = self.df.hvplot(kind=kind) obj = NdOverlay( { - 'x': element(self.df, 'index', 'x').redim(x='value'), - 'y': element(self.df, 'index', 'y').redim(y='value'), + 'x': element(self.df, 'index', Dimension('x', label='value')), + 'y': element(self.df, 'index', Dimension('y', label='value')), }, 'Variable', ) @@ -170,8 +187,8 @@ def test_wide_chart_labels(self, kind, element): plot = self.df.hvplot(kind=kind, value_label='Test', group_label='Category') obj = NdOverlay( { - 'x': element(self.df, 'index', 'x').redim(x='Test'), - 'y': element(self.df, 'index', 'y').redim(y='Test'), + 'x': element(self.df, 'index', Dimension('x', label='Test')), + 'y': element(self.df, 'index', Dimension('y', label='Test')), }, 'Category', ) @@ -263,8 +280,8 @@ def test_area_stacked(self): plot = self.df.hvplot.area(stacked=True) obj = NdOverlay( { - 'x': Area(self.df, 'index', 'x').redim(x='value'), - 'y': Area(self.df, 'index', 'y').redim(y='value'), + 'x': Area(self.df, 'index', Dimension('x', label='value')), + 'y': Area(self.df, 'index', Dimension('y', label='value')), }, 'Variable', ) @@ -316,8 +333,8 @@ def test_only_includes_num_chart(self, kind, element): plot = self.cat_df.hvplot(kind=kind) obj = NdOverlay( { - 'x': element(self.cat_df, 'index', 'x').redim(x='value'), - 'y': element(self.cat_df, 'index', 'y').redim(y='value'), + 'x': element(self.cat_df, 'index', Dimension('x', label='value')), + 'y': element(self.cat_df, 'index', Dimension('y', label='value')), }, 'Variable', ) @@ -328,10 +345,11 @@ def test_includes_str_if_no_num_chart(self, kind, element): plot = self.cat_only_df.hvplot(kind=kind) obj = NdOverlay( { - 'upper': element(self.cat_only_df, 'index', 'upper').redim(upper='value'), - 'lower': element(self.cat_only_df, 'index', 'lower').redim(lower='value'), + 'upper': element(self.cat_only_df, 'index', Dimension('upper', label='value')), + 'lower': element(self.cat_only_df, 'index', Dimension('lower', label='value')), }, 'Variable', + sort=False, ) self.assertEqual(plot, obj) @@ -477,6 +495,48 @@ def test_labels_by_subplots(self): ) assert isinstance(plot, NdLayout) + def test_groupby_from_index(self): + hmap = self.cat_df_index.hvplot.scatter(x='x', y='y', groupby='category', dynamic=False) + assert hmap.kdims == ['category'] + assert hmap.vdims == [] + assert list(hmap.keys()) == ['A', 'B', 'C'] + assert hmap.last.kdims == ['x'] + assert hmap.last.vdims == ['y'] + + def test_multi_index_groupby_from_index(self): + hmap = self.multii_df.hvplot.scatter(x='A', y='C', groupby='B', dynamic=False) + assert hmap.kdims == ['B'] + assert hmap.vdims == [] + assert list(hmap.keys()) == ['a', 'b'] + assert hmap.last.kdims == ['A'] + assert hmap.last.vdims == ['C'] + + @pytest.mark.xfail(reason='See https://github.com/holoviz/hvplot/issues/1364') + def test_hierarchical_columns_auto_stack(self): + arrays = [ + ['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], + ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two'], + ] + tuples = list(zip(*arrays)) + index = pd.MultiIndex.from_tuples(tuples) + df = pd.DataFrame(np.random.randn(3, 8), index=['A', 'B', 'C'], columns=index) + df.hvplot.scatter() + + def test_bar_y_from_index_with_by(self): + # Testing a somewhat silly plot but that seemed to be supported + # https://github.com/holoviz/hvplot/blob/6c96c7e9abcd44380d2122e3d86827dedab32dea/hvplot/converter.py#L1996-L1999 + plot = self.cat_df_index_y.hvplot.bar(x='x', y='y', by='category') + assert plot.kdims == ['x', 'category'] + assert plot.vdims == ['y'] + + def test_table_datetime_index_displayed(self): + table = self.dt_df.hvplot.table() + assert table.kdims[0] == 'index' + + def test_table_multi_index_displayed(self): + table = self.multii_df.hvplot.table() + assert table.kdims[:2] == self.multii_df.index.names + class TestChart1DDask(TestChart1D): def setUp(self): @@ -490,7 +550,18 @@ def setUp(self): self.df = dd.from_pandas(self.df, npartitions=2) self.dt_df = dd.from_pandas(self.dt_df, npartitions=3) self.cat_df = dd.from_pandas(self.cat_df, npartitions=3) + self.cat_df_index = dd.from_pandas(self.cat_df_index, npartitions=3) + self.cat_df_index_y = dd.from_pandas(self.cat_df_index_y, npartitions=3) self.cat_only_df = dd.from_pandas(self.cat_only_df, npartitions=1) def test_by_datetime_accessor(self): raise SkipTest("Can't expand dt accessor columns when using dask") + + def test_multi_index_groupby_from_index(self): + raise SkipTest('Dask does not support MultiIndex Dataframes.') + + def test_table_datetime_index_displayed(self): + raise SkipTest('Only supported for Pandas DatetimeIndex.') + + def test_table_multi_index_displayed(self): + raise SkipTest('Dask does not support MultiIndex Dataframes.') diff --git a/hvplot/tests/teststatplots.py b/hvplot/tests/teststatplots.py new file mode 100644 index 000000000..f83196752 --- /dev/null +++ b/hvplot/tests/teststatplots.py @@ -0,0 +1,24 @@ +import numpy as np +import pandas as pd +import xarray as xr + +from holoviews import Store + +import hvplot.xarray # noqa: F401 + + +def test_violin_from_xarray_with_by_and_color(): + latitudes = np.linspace(-90, 90, 180) + longitudes = np.linspace(-180, 180, 360) + times = pd.date_range('2023-01-01', periods=365, freq='D') + data = np.random.random((365, 180, 360)) + da = xr.DataArray( + data, + coords={'time': times, 'lat': latitudes, 'lon': longitudes}, + dims=['time', 'lat', 'lon'], + name='temperature', + ) + plot = da.hvplot.violin(y='temperature', by='lat', color='lat') + assert plot.kdims == ['lat'] + opts = Store.lookup_options('bokeh', plot, 'style') + assert opts.kwargs['violin_fill_color'] == 'lat' diff --git a/hvplot/util.py b/hvplot/util.py index 0c498529d..75f1df016 100644 --- a/hvplot/util.py +++ b/hvplot/util.py @@ -446,6 +446,16 @@ def is_xarray_dataarray(data): return isinstance(data, DataArray) +def support_index(data): + """ + HoloViews added in v1.19.0 support for retaining Pandas indexes (no longer + calling .reset_index()). + + Update this utility when other data interfaces support that (geopandas, dask, etc.) + """ + return type(data) is pd.DataFrame + + def process_intake(data, use_dask): if data.container not in ('dataframe', 'xarray'): raise NotImplementedError( @@ -530,7 +540,7 @@ def process_xarray( data = data.persist() if persist else data else: data = dataset.to_dataframe() - if len(data.index.names) > 1: + if not support_index(data) and len(data.index.names) > 1: data = data.reset_index() if len(dims) == 0: dims = ['index'] @@ -694,3 +704,26 @@ def import_datashader(): 'datashading features. Install it with pip or conda.' ) from None return datashader + + +def relabel(hv_obj, **kwargs): + """Conditionally relabel a HoloViews object""" + if kwargs: + hv_obj = hv_obj.relabel(**kwargs) + return hv_obj + + +def redim_(hv_obj, **kwargs): + """Conditionally redim a HoloViews object""" + if kwargs: + hv_obj = hv_obj.redim(**kwargs) + return hv_obj + + +def relabel_redim(hv_obj, relabel_kwargs, redim_kwargs): + """Conditionally relabel and/or redim a HoloViews object""" + if relabel_kwargs: + hv_obj = hv_obj.relabel(**relabel_kwargs) + if redim_kwargs: + hv_obj = hv_obj.redim(**redim_kwargs) + return hv_obj