Skip to content

Commit

Permalink
Histogram improvements (#1836)
Browse files Browse the repository at this point in the history
  • Loading branch information
philippjfr authored Oct 31, 2017
1 parent 77b92f0 commit 2782cfd
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 30 deletions.
62 changes: 39 additions & 23 deletions holoviews/operation/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Collection of either extremely generic or simple Operation
examples.
"""
from __future__ import division

import numpy as np

Expand All @@ -11,7 +12,8 @@
from ..core import (Operation, NdOverlay, Overlay, GridMatrix,
HoloMap, Dataset, Element, Collator, Dimension)
from ..core.data import ArrayInterface, DictInterface
from ..core.util import find_minmax, group_sanitizer, label_sanitizer, pd, basestring
from ..core.util import (find_minmax, group_sanitizer, label_sanitizer, pd,
basestring, datetime_types)
from ..element.chart import Histogram, Scatter
from ..element.raster import Raster, Image, RGB, QuadMesh
from ..element.path import Contours, Polygons
Expand Down Expand Up @@ -482,6 +484,9 @@ class histogram(Operation):
dimension = param.String(default=None, doc="""
Along which dimension of the Element to compute the histogram.""")

frequency_label = param.String(default='{dim} Frequency', doc="""
Format string defining the label of the frequency dimension of the Histogram.""")

groupby = param.ClassSelector(default=None, class_=(basestring, Dimension), doc="""
Defines a dimension to group the Histogram returning an NdOverlay of Histograms.""")

Expand All @@ -494,8 +499,14 @@ class histogram(Operation):
mean_weighted = param.Boolean(default=False, doc="""
Whether the weighted frequencies are averaged.""")

normed = param.Boolean(default=True, doc="""
Whether the histogram frequencies are normalized.""")
normed = param.ObjectSelector(default=True,
objects=[True, False, 'integral', 'height'],
doc="""
Controls normalization behavior. If `True` or `'integral'`, then
`density=True` is passed to np.histogram, and the distribution
is normalized such that the integral is unity. If `False`,
then the frequencies will be raw counts. If `'height'`, then the
frequencies are normalized such that the max bin height is unity.""")

nonzero = param.Boolean(default=False, doc="""
Whether to use only nonzero values when computing the histogram""")
Expand Down Expand Up @@ -531,16 +542,12 @@ def _process(self, view, key=None):
weights = weights[mask]
else:
weights = None
try:
hist_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\
if self.p.bin_range is None else self.p.bin_range
except ValueError:
hist_range = (0, 1)

data = data[np.isfinite(data)]
hist_range = self.p.bin_range or view.range(selected_dim)
# Avoids range issues including zero bin range and empty bins
if hist_range == (0, 0):
if hist_range == (0, 0) or any(not np.isfinite(r) for r in hist_range):
hist_range = (0, 1)
data = data[np.invert(np.isnan(data))]
if self.p.log:
bin_min = max([abs(hist_range[0]), data[data>0].min()])
edges = np.logspace(np.log10(bin_min), np.log10(hist_range[1]),
Expand All @@ -549,29 +556,39 @@ def _process(self, view, key=None):
edges = np.linspace(hist_range[0], hist_range[1], self.p.num_bins + 1)
normed = False if self.p.mean_weighted and self.p.weight_dimension else self.p.normed

data = data[np.isfinite(data)]
if len(data):
hist, edges = np.histogram(data, normed=normed, range=hist_range,
weights=weights, bins=edges)
if not normed and self.p.weight_dimension and self.p.mean_weighted:
hist_mean, _ = np.histogram(data, normed=normed,
range=hist_range, bins=self.p.num_bins)
hist /= hist_mean
if normed:
# This covers True, 'height', 'integral'
hist, edges = np.histogram(data, density=True, range=hist_range,
weights=weights, bins=edges)
if normed=='height':
hist /= hist.max()
else:
hist, edges = np.histogram(data, normed=normed, range=hist_range,
weights=weights, bins=edges)
if self.p.weight_dimension and self.p.mean_weighted:
hist_mean, _ = np.histogram(data, density=False, range=hist_range,
bins=self.p.num_bins)
hist /= hist_mean
else:
hist = np.zeros(self.p.num_bins)
hist[np.isnan(hist)] = 0

params = {}
if self.p.weight_dimension:
params['vdims'] = [view.get_dimension(self.p.weight_dimension)]
else:
label = self.p.frequency_label.format(dim=selected_dim)
params['vdims'] = [Dimension('{}_frequency'.format(selected_dim),
label=label)]

if view.group != view.__class__.__name__:
params['group'] = view.group

return Histogram((hist, edges), kdims=[view.get_dimension(selected_dim)],
label=view.label, **params)



class decimate(Operation):
"""
Decimates any column based Element to a specified number of random
Expand Down Expand Up @@ -765,8 +782,10 @@ def _process(self, p, element, ranges={}):
el_data = element.data

# Get dimensions to plot against each other
types = (str, basestring, np.str_, np.object_)+datetime_types
dims = [d for d in element.dimensions()
if _is_number(element.range(d)[0])]
if _is_number(element.range(d)[0]) and
not issubclass(element.get_dimension_type(d), types)]
permuted_dims = [(d1, d2) for d1 in dims
for d2 in dims[::-1]]

Expand All @@ -787,10 +806,7 @@ def _process(self, p, element, ranges={}):
el = p.diagonal_type(values, kdims=[d1])
elif p.diagonal_operation is histogram or isinstance(p.diagonal_operation, histogram):
bin_range = ranges.get(d1.name, element.range(d1))
opts = dict(axiswise=True, framewise=True)
el = p.diagonal_operation(element,
dimension=d1.name,
bin_range=bin_range).opts(norm=opts)
el = p.diagonal_operation(element, dimension=d1.name, bin_range=bin_range)
else:
el = p.diagonal_operation(element, dimension=d1.name)
else:
Expand Down
3 changes: 2 additions & 1 deletion holoviews/plotting/bokeh/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,8 @@ def get_data(self, element, ranges, style):
data = dict(top=element.values, left=element.edges[:-1],
right=element.edges[1:])

color_dims = self.adjoined.traverse(lambda x: x.handles.get('color_dim'))
color_dims = [d for d in self.adjoined.traverse(lambda x: x.handles.get('color_dim'))
if d is not None]
dim = color_dims[0] if color_dims else None
cmapper = self._get_colormapper(dim, element, {}, {})
if cmapper and dim in element.dimensions():
Expand Down
12 changes: 6 additions & 6 deletions holoviews/plotting/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,19 +254,19 @@ def get_sideplot_ranges(plot, element, main, ranges):
dictionary of ranges.
"""
key = plot.current_key
dims = element.dimensions(label=True)
dim = dims[1] if dims[1] != 'Frequency' else dims[0]
dims = element.dimensions()
dim = dims[0] if 'frequency' in dims[1].name else dims[1]
range_item = main
if isinstance(main, HoloMap):
if issubclass(main.type, CompositeOverlay):
range_item = [hm for hm in main.split_overlays()[1]
if dim in hm.dimensions('all', label=True)][0]
if dim in hm.dimensions('all')][0]
else:
range_item = HoloMap({0: main}, kdims=['Frame'])
ranges = match_spec(range_item.last, ranges)

if dim in ranges:
main_range = ranges[dim]
if dim.name in ranges:
main_range = ranges[dim.name]
else:
framewise = plot.lookup_options(range_item.last, 'norm').options.get('framewise')
if framewise and range_item.get(key, False):
Expand All @@ -279,7 +279,7 @@ def get_sideplot_ranges(plot, element, main, ranges):
range_item = range_item.last
if isinstance(range_item, CompositeOverlay):
range_item = [ov for ov in range_item
if dim in ov.dimensions('all', label=True)][0]
if dim in ov.dimensions('all')][0]
return range_item, main_range, dim


Expand Down
24 changes: 24 additions & 0 deletions tests/testoperation.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,18 +74,42 @@ def test_image_contours_filled(self):
def test_points_histogram(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3)

# Make sure that the name and label are as desired
op_freq_dim = op_hist.get_dimension('x_frequency')
self.assertEqual(op_freq_dim.label, 'x Frequency')

# Because the operation labels are now different from the
# default Element label, change back before comparing.
op_hist = op_hist.redim(x_frequency='Frequency')
hist = Histogram(([0.1, 0.1, 0.133333], [0, 3, 6, 9]))
self.assertEqual(op_hist, hist)

def test_points_histogram_bin_range(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, bin_range=(0, 3))

# Make sure that the name and label are as desired
op_freq_dim = op_hist.get_dimension('x_frequency')
self.assertEqual(op_freq_dim.label, 'x Frequency')

# Because the operation labels are now different from the
# default Element label, change back before comparing.
op_hist = op_hist.redim(x_frequency='Frequency')
hist = Histogram(([0.25, 0.25, 0.5], [0., 1., 2., 3.]))
self.assertEqual(op_hist, hist)

def test_points_histogram_not_normed(self):
points = Points([float(i) for i in range(10)])
op_hist = histogram(points, num_bins=3, normed=False)

# Make sure that the name and label are as desired
op_freq_dim = op_hist.get_dimension('x_frequency')
self.assertEqual(op_freq_dim.label, 'x Frequency')

# Because the operation labels are now different from the
# default Element label, change back before comparing.
op_hist = op_hist.redim(x_frequency='Frequency')
hist = Histogram(([3, 3, 4], [0, 3, 6, 9]))
self.assertEqual(op_hist, hist)

Expand Down

0 comments on commit 2782cfd

Please sign in to comment.