diff --git a/holoviews/operation/element.py b/holoviews/operation/element.py index 8213499e13..0c7b651685 100644 --- a/holoviews/operation/element.py +++ b/holoviews/operation/element.py @@ -2,6 +2,7 @@ Collection of either extremely generic or simple Operation examples. """ +from __future__ import division import numpy as np @@ -11,7 +12,8 @@ from ..core import (Operation, NdOverlay, Overlay, GridMatrix, HoloMap, Dataset, Element, Collator, Dimension) from ..core.data import ArrayInterface, DictInterface -from ..core.util import find_minmax, group_sanitizer, label_sanitizer, pd, basestring +from ..core.util import (find_minmax, group_sanitizer, label_sanitizer, pd, + basestring, datetime_types) from ..element.chart import Histogram, Scatter from ..element.raster import Raster, Image, RGB, QuadMesh from ..element.path import Contours, Polygons @@ -482,6 +484,9 @@ class histogram(Operation): dimension = param.String(default=None, doc=""" Along which dimension of the Element to compute the histogram.""") + frequency_label = param.String(default='{dim} Frequency', doc=""" + Format string defining the label of the frequency dimension of the Histogram.""") + groupby = param.ClassSelector(default=None, class_=(basestring, Dimension), doc=""" Defines a dimension to group the Histogram returning an NdOverlay of Histograms.""") @@ -494,8 +499,14 @@ class histogram(Operation): mean_weighted = param.Boolean(default=False, doc=""" Whether the weighted frequencies are averaged.""") - normed = param.Boolean(default=True, doc=""" - Whether the histogram frequencies are normalized.""") + normed = param.ObjectSelector(default=True, + objects=[True, False, 'integral', 'height'], + doc=""" + Controls normalization behavior. If `True` or `'integral'`, then + `density=True` is passed to np.histogram, and the distribution + is normalized such that the integral is unity. If `False`, + then the frequencies will be raw counts. If `'height'`, then the + frequencies are normalized such that the max bin height is unity.""") nonzero = param.Boolean(default=False, doc=""" Whether to use only nonzero values when computing the histogram""") @@ -531,16 +542,12 @@ def _process(self, view, key=None): weights = weights[mask] else: weights = None - try: - hist_range = find_minmax((np.nanmin(data), np.nanmax(data)), (0, -float('inf')))\ - if self.p.bin_range is None else self.p.bin_range - except ValueError: - hist_range = (0, 1) + data = data[np.isfinite(data)] + hist_range = self.p.bin_range or view.range(selected_dim) # Avoids range issues including zero bin range and empty bins - if hist_range == (0, 0): + if hist_range == (0, 0) or any(not np.isfinite(r) for r in hist_range): hist_range = (0, 1) - data = data[np.invert(np.isnan(data))] if self.p.log: bin_min = max([abs(hist_range[0]), data[data>0].min()]) edges = np.logspace(np.log10(bin_min), np.log10(hist_range[1]), @@ -549,14 +556,20 @@ def _process(self, view, key=None): edges = np.linspace(hist_range[0], hist_range[1], self.p.num_bins + 1) normed = False if self.p.mean_weighted and self.p.weight_dimension else self.p.normed - data = data[np.isfinite(data)] if len(data): - hist, edges = np.histogram(data, normed=normed, range=hist_range, - weights=weights, bins=edges) - if not normed and self.p.weight_dimension and self.p.mean_weighted: - hist_mean, _ = np.histogram(data, normed=normed, - range=hist_range, bins=self.p.num_bins) - hist /= hist_mean + if normed: + # This covers True, 'height', 'integral' + hist, edges = np.histogram(data, density=True, range=hist_range, + weights=weights, bins=edges) + if normed=='height': + hist /= hist.max() + else: + hist, edges = np.histogram(data, normed=normed, range=hist_range, + weights=weights, bins=edges) + if self.p.weight_dimension and self.p.mean_weighted: + hist_mean, _ = np.histogram(data, density=False, range=hist_range, + bins=self.p.num_bins) + hist /= hist_mean else: hist = np.zeros(self.p.num_bins) hist[np.isnan(hist)] = 0 @@ -564,6 +577,11 @@ def _process(self, view, key=None): params = {} if self.p.weight_dimension: params['vdims'] = [view.get_dimension(self.p.weight_dimension)] + else: + label = self.p.frequency_label.format(dim=selected_dim) + params['vdims'] = [Dimension('{}_frequency'.format(selected_dim), + label=label)] + if view.group != view.__class__.__name__: params['group'] = view.group @@ -571,7 +589,6 @@ def _process(self, view, key=None): label=view.label, **params) - class decimate(Operation): """ Decimates any column based Element to a specified number of random @@ -765,8 +782,10 @@ def _process(self, p, element, ranges={}): el_data = element.data # Get dimensions to plot against each other + types = (str, basestring, np.str_, np.object_)+datetime_types dims = [d for d in element.dimensions() - if _is_number(element.range(d)[0])] + if _is_number(element.range(d)[0]) and + not issubclass(element.get_dimension_type(d), types)] permuted_dims = [(d1, d2) for d1 in dims for d2 in dims[::-1]] @@ -787,10 +806,7 @@ def _process(self, p, element, ranges={}): el = p.diagonal_type(values, kdims=[d1]) elif p.diagonal_operation is histogram or isinstance(p.diagonal_operation, histogram): bin_range = ranges.get(d1.name, element.range(d1)) - opts = dict(axiswise=True, framewise=True) - el = p.diagonal_operation(element, - dimension=d1.name, - bin_range=bin_range).opts(norm=opts) + el = p.diagonal_operation(element, dimension=d1.name, bin_range=bin_range) else: el = p.diagonal_operation(element, dimension=d1.name) else: diff --git a/holoviews/plotting/bokeh/chart.py b/holoviews/plotting/bokeh/chart.py index 7dd0b25e69..05d330b4a2 100644 --- a/holoviews/plotting/bokeh/chart.py +++ b/holoviews/plotting/bokeh/chart.py @@ -376,7 +376,8 @@ def get_data(self, element, ranges, style): data = dict(top=element.values, left=element.edges[:-1], right=element.edges[1:]) - color_dims = self.adjoined.traverse(lambda x: x.handles.get('color_dim')) + color_dims = [d for d in self.adjoined.traverse(lambda x: x.handles.get('color_dim')) + if d is not None] dim = color_dims[0] if color_dims else None cmapper = self._get_colormapper(dim, element, {}, {}) if cmapper and dim in element.dimensions(): diff --git a/holoviews/plotting/util.py b/holoviews/plotting/util.py index d194a1e57b..a4155ae3db 100644 --- a/holoviews/plotting/util.py +++ b/holoviews/plotting/util.py @@ -254,19 +254,19 @@ def get_sideplot_ranges(plot, element, main, ranges): dictionary of ranges. """ key = plot.current_key - dims = element.dimensions(label=True) - dim = dims[1] if dims[1] != 'Frequency' else dims[0] + dims = element.dimensions() + dim = dims[0] if 'frequency' in dims[1].name else dims[1] range_item = main if isinstance(main, HoloMap): if issubclass(main.type, CompositeOverlay): range_item = [hm for hm in main.split_overlays()[1] - if dim in hm.dimensions('all', label=True)][0] + if dim in hm.dimensions('all')][0] else: range_item = HoloMap({0: main}, kdims=['Frame']) ranges = match_spec(range_item.last, ranges) - if dim in ranges: - main_range = ranges[dim] + if dim.name in ranges: + main_range = ranges[dim.name] else: framewise = plot.lookup_options(range_item.last, 'norm').options.get('framewise') if framewise and range_item.get(key, False): @@ -279,7 +279,7 @@ def get_sideplot_ranges(plot, element, main, ranges): range_item = range_item.last if isinstance(range_item, CompositeOverlay): range_item = [ov for ov in range_item - if dim in ov.dimensions('all', label=True)][0] + if dim in ov.dimensions('all')][0] return range_item, main_range, dim diff --git a/tests/testoperation.py b/tests/testoperation.py index fe5aae3cf4..6418652c53 100644 --- a/tests/testoperation.py +++ b/tests/testoperation.py @@ -74,18 +74,42 @@ def test_image_contours_filled(self): def test_points_histogram(self): points = Points([float(i) for i in range(10)]) op_hist = histogram(points, num_bins=3) + + # Make sure that the name and label are as desired + op_freq_dim = op_hist.get_dimension('x_frequency') + self.assertEqual(op_freq_dim.label, 'x Frequency') + + # Because the operation labels are now different from the + # default Element label, change back before comparing. + op_hist = op_hist.redim(x_frequency='Frequency') hist = Histogram(([0.1, 0.1, 0.133333], [0, 3, 6, 9])) self.assertEqual(op_hist, hist) def test_points_histogram_bin_range(self): points = Points([float(i) for i in range(10)]) op_hist = histogram(points, num_bins=3, bin_range=(0, 3)) + + # Make sure that the name and label are as desired + op_freq_dim = op_hist.get_dimension('x_frequency') + self.assertEqual(op_freq_dim.label, 'x Frequency') + + # Because the operation labels are now different from the + # default Element label, change back before comparing. + op_hist = op_hist.redim(x_frequency='Frequency') hist = Histogram(([0.25, 0.25, 0.5], [0., 1., 2., 3.])) self.assertEqual(op_hist, hist) def test_points_histogram_not_normed(self): points = Points([float(i) for i in range(10)]) op_hist = histogram(points, num_bins=3, normed=False) + + # Make sure that the name and label are as desired + op_freq_dim = op_hist.get_dimension('x_frequency') + self.assertEqual(op_freq_dim.label, 'x Frequency') + + # Because the operation labels are now different from the + # default Element label, change back before comparing. + op_hist = op_hist.redim(x_frequency='Frequency') hist = Histogram(([3, 3, 4], [0, 3, 6, 9])) self.assertEqual(op_hist, hist)