Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow Bars to be plotted on continuous axes #6145

Merged
merged 26 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 36 additions & 2 deletions examples/reference/elements/bokeh/Bars.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"A ``Bars`` element can be sliced and selecting on like any other element:"
"A `Bars` element can be sliced and selected on like any other element:"
]
},
{
Expand Down Expand Up @@ -88,7 +88,41 @@
"\n",
"# or using .redim.values(**{'Car Occupants': ['three', 'two', 'four', 'one', 'five', 'six']})\n",
"\n",
"hv.Bars(data, occupants, 'Count') "
"hv.Bars(data, occupants, 'Count')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`Bars` also supports continuous data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": [0, 1, 5], \"y\": [0, 2, 10]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And datetime data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": pd.date_range(\"2017-01-01\", \"2017-01-03\"), \"y\": [0, 2, -1]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
Expand Down
37 changes: 36 additions & 1 deletion examples/reference/elements/matplotlib/Bars.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import holoviews as hv\n",
"hv.extension('matplotlib')"
Expand Down Expand Up @@ -80,6 +81,40 @@
"hv.Bars(data, occupants, 'Count') "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`Bars` also supports continuous data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": [0, 1, 5], \"y\": [0, 2, 10]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And datetime data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": pd.date_range(\"2017-01-01\", \"2017-01-03\"), \"y\": [0, 2, -1]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -169,5 +204,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}
35 changes: 35 additions & 0 deletions examples/reference/elements/plotly/Bars.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import holoviews as hv\n",
"hv.extension('plotly')"
Expand Down Expand Up @@ -80,6 +81,40 @@
"hv.Bars(data, occupants, 'Count')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`Bars` also support continuous data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": [0, 1, 5], \"y\": [0, 2, 10]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"And datetime data and x-axis."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"data = pd.DataFrame({\"x\": pd.date_range(\"2017-01-01\", \"2017-01-03\"), \"y\": [0, 2, -1]})\n",
"hv.Bars(data, [\"x\"], [\"y\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down
36 changes: 25 additions & 11 deletions holoviews/plotting/bokeh/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

import numpy as np
import param
from bokeh.models import CategoricalColorMapper, CustomJS, FactorRange, Range1d, Whisker
from bokeh.models import CategoricalColorMapper, CustomJS, Whisker
from bokeh.models.tools import BoxSelectTool
from bokeh.transform import jitter

from ...core.data import Dataset
from ...core.dimension import dimension_name
from ...core.util import dimension_sanitizer, isfinite
from ...core.util import dimension_sanitizer, isdatetime, isfinite
from ...operation import interpolate_curve
from ...util.transform import dim
from ..mixins import AreaMixin, BarsMixin, SpikesMixin
Expand Down Expand Up @@ -780,10 +780,6 @@ class BarPlot(BarsMixin, ColorbarPlot, LegendPlot):
_nonvectorized_styles = base_properties + ['bar_width', 'cmap']
_plot_methods = dict(single=('vbar', 'hbar'))

# Declare that y-range should auto-range if not bounded
_x_range_type = FactorRange
_y_range_type = Range1d

def _axis_properties(self, axis, key, plot, dimension=None,
ax_mapping=None):
if ax_mapping is None:
Expand Down Expand Up @@ -862,10 +858,10 @@ def _add_color_data(self, ds, ranges, style, cdim, data, mapping, factors, color

# Merge data and mappings
mapping.update(cmapping)
for k, cd in cdata.items():
for i, (k, cd) in enumerate(cdata.items()):
if isinstance(cmapper, CategoricalColorMapper) and cd.dtype.kind in 'uif':
cd = categorize_array(cd, cdim)
if k not in data or len(data[k]) != next(len(data[key]) for key in data if key != k):
if k not in data or (len(data[k]) != next(len(data[key]) for key in data if key != k) and not i == len(cdata) - 1):
data[k].append(cd)
else:
data[k][-1] = cd
Expand All @@ -889,6 +885,7 @@ def get_data(self, element, ranges, style):
grouping = 'grouped'
group_dim = element.get_dimension(1)

data = defaultdict(list)
xdim = element.get_dimension(0)
ydim = element.vdims[0]
no_cidx = self.color_index is None
Expand All @@ -906,18 +903,36 @@ def get_data(self, element, ranges, style):
hover = 'hover' in self.handles

# Group by stack or group dim if necessary
xdiff = None
xvals = element.dimension_values(xdim)
if group_dim is None:
grouped = {0: element}
is_dt = isdatetime(xvals)
if is_dt or xvals.dtype.kind != 'O':
ahuang11 marked this conversation as resolved.
Show resolved Hide resolved
xdiff = np.diff(xvals)
if len(np.unique(xdiff)) == 1 and xdiff[0] == 0:
xdiff = 1
if is_dt:
width = xdiff.astype('timedelta64[ns]').astype(np.int64) * width / 1e6
else:
width = width / xdiff
width = 1 - np.repeat(np.min(np.abs(width)), len(xvals))
ahuang11 marked this conversation as resolved.
Show resolved Hide resolved
data['width'] = [width]
else:
data['width'] = [np.repeat(width, len(xvals))]
width = 'width'
else:
grouped = element.groupby(group_dim, group_type=Dataset,
container_type=dict,
datatype=['dataframe', 'dictionary'])
data["width"] = [np.repeat(width, len(xvals))]
ahuang11 marked this conversation as resolved.
Show resolved Hide resolved

y0, y1 = ranges.get(ydim.name, {'combined': (None, None)})['combined']
if self.logy:
bottom = (ydim.range[0] or (0.01 if y1 > 0.01 else 10**(np.log10(y1)-2)))
else:
bottom = 0

# Map attributes to data
if grouping == 'stacked':
mapping = {'x': xdim.name, 'top': 'top',
Expand Down Expand Up @@ -956,7 +971,6 @@ def get_data(self, element, ranges, style):
factors, colors = None, None

# Iterate over stacks and groups and accumulate data
data = defaultdict(list)
baselines = defaultdict(lambda: {'positive': bottom, 'negative': 0})
for k, ds in grouped.items():
k = k[0] if isinstance(k, tuple) else k
Expand Down Expand Up @@ -995,7 +1009,7 @@ def get_data(self, element, ranges, style):
ds = ds.add_dimension(group_dim, ds.ndims, gval)
data[group_dim.name].append(ds.dimension_values(group_dim))
else:
data[xdim.name].append(ds.dimension_values(xdim))
data[xdim.name].append(xvals)
data[ydim.name].append(ds.dimension_values(ydim))

if hover and grouping != 'stacked':
Expand Down Expand Up @@ -1027,7 +1041,7 @@ def get_data(self, element, ranges, style):

# Ensure x-values are categorical
xname = dimension_sanitizer(xdim.name)
if xname in sanitized_data:
if xname in sanitized_data and isinstance(sanitized_data[xname], np.ndarray) and sanitized_data[xname].dtype.kind not in 'uifM' and not isdatetime(sanitized_data[xname]):
sanitized_data[xname] = categorize_array(sanitized_data[xname], xdim)

# If axes inverted change mapping to match hbar signature
Expand Down
8 changes: 5 additions & 3 deletions holoviews/plotting/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def get_extents(self, element, ranges, range_type='combined', **kwargs):
s0 = min(s0, 0) if util.isfinite(s0) else 0
s1 = max(s1, 0) if util.isfinite(s1) else 0
ranges[vdim]['soft'] = (s0, s1)
l, b, r, t = super().get_extents(element, ranges, range_type, ydim=element.vdims[0])
if range_type not in ('combined', 'data'):
return super().get_extents(element, ranges, range_type, ydim=element.vdims[0])
return l, b, r, t

# Compute stack heights
xdim = element.kdims[0]
Expand All @@ -173,14 +174,15 @@ def get_extents(self, element, ranges, range_type='combined', **kwargs):
else:
y0, y1 = ranges[vdim]['combined']

x0, x1 = (l, r) if util.isnumeric(l) and len(element.kdims) == 1 else ('', '')
if range_type == 'data':
return ('', y0, '', y1)
return (x0, y0, x1, y1)

padding = 0 if self.overlaid else self.padding
_, ypad, _ = get_axis_padding(padding)
y0, y1 = util.dimension_range(y0, y1, ranges[vdim]['hard'], ranges[vdim]['soft'], ypad, self.logy)
y0, y1 = util.dimension_range(y0, y1, self.ylim, (None, None))
return ('', y0, '', y1)
return (x0, y0, x1, y1)

def _get_coords(self, element, ranges, as_string=True):
"""
Expand Down
57 changes: 48 additions & 9 deletions holoviews/plotting/mpl/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,8 @@ def _finalize_ticks(self, axis, element, xticks, yticks, zticks):
def _create_bars(self, axis, element, ranges, style):
# Get values dimensions, and style information
(gdim, cdim, sdim), values = self._get_values(element, ranges)

cats = None
style_dim = None
if sdim:
cats = values['stack']
Expand All @@ -941,7 +943,23 @@ def _create_bars(self, axis, element, ranges, style):
style_map = {None: {}}

# Compute widths
width = (1-(2.*self.bar_padding)) / len(values.get('category', [None]))
xvals = element.dimension_values(0)
is_dt = isdatetime(xvals)
continuous = True
if is_dt or xvals.dtype.kind != 'O' and not (cdim or len(element.kdims) > 1):
xdiff_vals = date2num(xvals) if is_dt else xvals
xdiff = np.abs(np.diff(xdiff_vals))
if len(np.unique(xdiff)) == 1:
# if all are same
xdiff = 1
else:
xdiff = np.min(xdiff)
width = (1 - self.bar_padding) * xdiff
else:
xdiff = len(values.get('category', [None]))
width = (1 - self.bar_padding) / xdiff
continuous = False

if self.invert_axes:
plot_fn = 'barh'
x, y, w, bottom = 'y', 'width', 'height', 'left'
Expand All @@ -952,21 +970,30 @@ def _create_bars(self, axis, element, ranges, style):
# Iterate over group, category and stack dimension values
# computing xticks and drawing bars and applying styles
xticks, labels, bar_data = [], [], {}
categories = values.get('category', [None])
num_categories = len(categories)
for gidx, grp in enumerate(values.get('group', [None])):
sel_key = {}
label = None
if grp is not None:
grp_label = gdim.pprint_value(grp)
sel_key[gdim.name] = [grp]
yalign = -0.04 if cdim and self.multi_level else 0
xticks.append((gidx+0.5, grp_label, yalign))
for cidx, cat in enumerate(values.get('category', [None])):
xpos = gidx+self.bar_padding+(cidx*width)
goffset = width * (num_categories / 2 - 0.5)
if num_categories > 1:
# mini offset needed or else combines with non-continuous
goffset += 0.000001
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These kind of magic numbers seems hacky. Are you sure there is not a better way?

Copy link
Collaborator

@ahuang11 ahuang11 May 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK no (notice all the NA's)

Without:

import numpy as np
import pandas as pd
import holoviews as hv
hv.extension("matplotlib")

samples = 100

pets = ["A", "B"]
genders = [1, 2, 3, 5]

np.random.seed(100)
samples = 100

pets = ['Cat', 'Dog', 'Hamster', 'Rabbit']
genders = ['Female', 'Male', "NA"]

pets_sample = np.random.choice(pets, samples)
gender_sample = np.random.choice(genders, samples)

bars = hv.Bars((pets_sample, gender_sample, np.ones(samples)), ['Pets', 'Gender']).aggregate(function=np.sum)

bars.opts(fig_size=300, aspect=2)
image

With the "hack"
image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is what is happening behind the scenes (haven't added a small eps).

import matplotlib.pyplot as plt
import matplotlib as mpl

# HoloViews
bar_data = {
    "Female": {
        "x": [0.0, 1.0, 2.0, 3.0],
        "height": [6.0, 10.0, 10.0, 7.0],
        "width": 0.26666666666666666,
        "bottom": [0, 0, 0, 0],
        "label": "Female",
        "zorder": 0,
        "edgecolor": "k",
        "color": "#30a2da",
    },
    "NA": {
        "x": [0.26666666666666666, 1.2666666666666666, 2.2666666666666666, 3.2666666666666666],
        "height": [10.0, 10.0, 10.0, 9.0],
        "width": 0.26666666666666666,
        "bottom": [0, 0, 0, 0],
        "label": "NA",
        "zorder": 0,
        "edgecolor": "k",
        "color": "#fc4f30",
    },
    "Male": {
        "x": [0.5333333333333333, 1.5333333333333332, 2.533333333333333, 3.533333333333333],
        "height": [8.0, 7.0, 6.0, 7.0],
        "width": 0.26666666666666666,
        "bottom": [0, 0, 0, 0],
        "label": "Male",
        "zorder": 0,
        "edgecolor": "k",
        "color": "#e5ae38",
    },
}
xaxis_kwargs = [
    (0.26666666666666666, "Cat", -0.04),
    (0.0, "Female", 0),
    (0.26666666666666666, "NA", 0),
    (0.5333333333333333, "Male", 0),
    (1.2666666666666666, "Rabbit", -0.04),
    (1.0, "Female", 0),
    (1.2666666666666666, "NA", 0),
    (1.5333333333333332, "Male", 0),
    (2.2666666666666666, "Hamster", -0.04),
    (2.0, "Female", 0),
    (2.2666666666666666, "NA", 0),
    (2.533333333333333, "Male", 0),
    (3.2666666666666666, "Dog", -0.04),
    (3.0, "Female", 0),
    (3.2666666666666666, "NA", 0),
    (3.533333333333333, "Male", 0),
]


fig, axis = plt.subplots()
bars = [getattr(axis, "bar")(**bar_spec) for bar_spec in bar_data.values()]
ticks, labels, alignments = zip(*sorted(xaxis_kwargs, key=lambda x: x[0]))
axis.set_xticks(ticks)
axis.set_xticklabels(labels)
for t, y in zip(axis.get_xticklabels(), alignments):
    t.set_y(y)

I would have thought I could use t.set_text(label) in the for-loop. But it does not seem to work with Matplotlib, see here:

import matplotlib.pyplot as plt

fig, axis = plt.subplots()
for t in axis.get_xticklabels():
    t.set_text('a')

So the small diff is OK. Maybe make it relative to the width: goffset += width / 1000

ahuang11 marked this conversation as resolved.
Show resolved Hide resolved

xpos = gidx+goffset if not continuous else xvals[gidx]
if not continuous:
xticks.append(((xpos), grp_label, yalign))
for cidx, cat in enumerate(categories):
xpos = gidx+(cidx*width) if not continuous else xvals[gidx]
if cat is not None:
label = cdim.pprint_value(cat)
sel_key[cdim.name] = [cat]
if self.multi_level:
xticks.append((xpos+width/2., label, 0))
if self.multi_level and not continuous:
xticks.append((xpos, label, 0))
prev = 0
for stk in values.get('stack', [None]):
if stk is not None:
Expand All @@ -975,7 +1002,8 @@ def _create_bars(self, axis, element, ranges, style):
el = element.select(**sel_key)
vals = el.dimension_values(element.vdims[0].name)
val = float(vals[0]) if len(vals) else np.nan
xval = xpos+width/2.
xval = xpos

if label in bar_data:
group = bar_data[label]
group[x].append(xval)
Expand Down Expand Up @@ -1014,8 +1042,19 @@ def _create_bars(self, axis, element, ranges, style):
legend_opts.update(**leg_spec)
axis.legend(title=title, **legend_opts)

return bars, xticks, ax_dims

x_range = ranges[gdim.name]["data"]
if continuous and not is_dt:
if style.get('align', 'center') == 'center':
left_multiplier = 0.5
right_multiplier = 0.5
else:
left_multiplier = 0
right_multiplier = 1
ranges[gdim.name]["data"] = (
x_range[0] - width * left_multiplier,
x_range[1] + width * right_multiplier
)
return bars, xticks if not continuous else None, ax_dims


class SpikesPlot(SpikesMixin, PathPlot, ColorbarPlot):
Expand Down
Loading