holoviz · jbednar · Mar 31, 2016 · Mar 30, 2016 · Mar 30, 2016 · Mar 30, 2016
diff --git a/datashader/pipeline.py b/datashader/pipeline.py
@@ -32,14 +32,18 @@ class Pipeline(object):
     color_fn : callable, optional
         A callable that takes the output of ``tranform_fn``, and returns an
         ``Image`` object. Default is ``interpolate``.
+    spread_fn : callable, optional
+        A callable that takes the output of ``color_fn``, and returns another
+        ``Image`` object. Default is ``dynspread``.
     """
     def __init__(self, df, glyph, agg=reductions.count(),
-                 transform_fn=identity, color_fn=tf.interpolate):
+                 transform_fn=identity, color_fn=tf.interpolate,  spread_fn=tf.dynspread):
         self.df = df
         self.glyph = glyph
         self.agg = agg
         self.transform_fn = transform_fn
         self.color_fn = color_fn
+        self.spread_fn = spread_fn
 
     def __call__(self, x_range=None, y_range=None, width=600, height=600):
         """Compute an image from the specified pipeline.
@@ -55,4 +59,5 @@ def __call__(self, x_range=None, y_range=None, width=600, height=600):
         canvas = core.Canvas(plot_width=width, plot_height=height,
                              x_range=x_range, y_range=y_range)
         bins = core.bypixel(self.df, canvas, self.glyph, self.agg)
-        return self.color_fn(self.transform_fn(bins))
+        img = self.color_fn(self.transform_fn(bins))
+        return self.spread_fn(img)
diff --git a/datashader/tests/test_transfer_functions.py b/datashader/tests/test_transfer_functions.py
@@ -176,9 +176,9 @@ def test_masks():
     np.testing.assert_equal(tf._square_mask(0), np.ones((1, 1), dtype='bool'))
     # Circle
     np.testing.assert_equal(tf._circle_mask(0), np.ones((1, 1), dtype='bool'))
-    out = np.array([[0, 1, 0],
+    out = np.array([[1, 1, 1],
                     [1, 1, 1],
-                    [0, 1, 0]], dtype='bool')
+                    [1, 1, 1]], dtype='bool')
     np.testing.assert_equal(tf._circle_mask(1), out)
     out = np.array([[0, 0, 1, 1, 1, 0, 0],
                     [0, 1, 1, 1, 1, 1, 0],
@@ -203,11 +203,11 @@ def test_spread():
     img = tf.Image(data, coords=coords, dims=dims)
 
     s = tf.spread(img)
-    o = np.array([[0xdc00007d, 0xdc009036, 0x7d00007d, 0x00000000, 0x00000000],
-                  [0xdc009036, 0xdc009036, 0x7d00ff00, 0x00000000, 0x00000000],
-                  [0x7d00007d, 0x7d00ff00, 0x00000000, 0x7dff0000, 0x00000000],
+    o = np.array([[0xed00863b, 0xed00863b, 0xbc00a82a, 0x00000000, 0x00000000],
+                  [0xed00863b, 0xed00863b, 0xbc00a82a, 0x00000000, 0x00000000],
+                  [0xbc00a82a, 0xbc00a82a, 0xbca85600, 0x7dff0000, 0x7dff0000],
                   [0x00000000, 0x00000000, 0x7dff0000, 0x7dff0000, 0x7dff0000],
-                  [0x00000000, 0x00000000, 0x00000000, 0x7dff0000, 0x00000000]])
+                  [0x00000000, 0x00000000, 0x7dff0000, 0x7dff0000, 0x7dff0000]])
     np.testing.assert_equal(s.data, o)
     assert (s.x_axis == img.x_axis).all()
     assert (s.y_axis == img.y_axis).all()
@@ -230,11 +230,11 @@ def test_spread():
     np.testing.assert_equal(s.data, o)
 
     s = tf.spread(img, how='add')
-    o = np.array([[0xff0000b7, 0xff007d7a, 0x7d00007d, 0x00000000, 0x00000000],
-                  [0xff007d7a, 0xff007d7a, 0x7d00ff00, 0x00000000, 0x00000000],
-                  [0x7d00007d, 0x7d00ff00, 0x00000000, 0x7dff0000, 0x00000000],
+    o = np.array([[0xff007db7, 0xff007db7, 0xfa007f3e, 0x00000000, 0x00000000],
+                  [0xff007db7, 0xff007db7, 0xfa007f3e, 0x00000000, 0x00000000],
+                  [0xfa007f3e, 0xfa007f3e, 0xfa7f7f00, 0x7dff0000, 0x7dff0000],
                   [0x00000000, 0x00000000, 0x7dff0000, 0x7dff0000, 0x7dff0000],
-                  [0x00000000, 0x00000000, 0x00000000, 0x7dff0000, 0x00000000]])
+                  [0x00000000, 0x00000000, 0x7dff0000, 0x7dff0000, 0x7dff0000]])
     np.testing.assert_equal(s.data, o)
 
     mask = np.array([[1, 0, 1],

diff --git a/datashader/transfer_functions.py b/datashader/transfer_functions.py
@@ -2,7 +2,8 @@
 
 from io import BytesIO
 import warnings
-
+import collections
+
 import numpy as np
 import numba as nb
 import toolz as tz
@@ -11,7 +12,7 @@
 
 
 from .colors import rgb
-from .composite import composite_op_lookup, source
+from .composite import composite_op_lookup, over
 from .utils import ngjit
 
 
@@ -61,7 +62,7 @@ def stack(*imgs, **kwargs):
     return Image(out, coords=imgs[0].coords, dims=imgs[0].dims)
 
 
-def eq_hist(data, mask=None, nbins=256):
+def eq_hist(data, mask=None, nbins=256*256):
     """Return a numpy array after histogram equalization.
 
     For use in `interpolate`.
@@ -114,7 +115,7 @@ def _normalize_interpolate_how(how):
     raise ValueError("Unknown interpolation method: {0}".format(how))
 
 
-def interpolate(agg, low=None, high=None, cmap=None, how='cbrt'):
+def interpolate(agg, low=None, high=None, cmap=None, how='eq_hist'):
     """Convert a 2D DataArray to an image.
 
     Data is converted to an image either by interpolating between a `low` and
@@ -161,6 +162,8 @@ def interpolate(agg, low=None, high=None, cmap=None, how='cbrt'):
         offset = agg.data[agg.data > 0].min()
     data = how(agg.data - offset, mask.data)
     span = [np.nanmin(data), np.nanmax(data)]
+    if isinstance(cmap,collections.Iterator):
+        cmap = list(cmap)
     if isinstance(cmap, list):
         rspan, gspan, bspan = np.array(list(zip(*map(rgb, cmap))))
         span = np.linspace(span[0], span[1], len(cmap))
@@ -180,7 +183,7 @@ def interpolate(agg, low=None, high=None, cmap=None, how='cbrt'):
     return Image(img, coords=agg.coords, dims=agg.dims)
 
 
-def colorize(agg, color_key, how='cbrt', min_alpha=20):
+def colorize(agg, color_key, how='eq_hist', min_alpha=20):
     """Color a CategoricalAggregate by field.
 
     Parameters
@@ -245,7 +248,7 @@ def set_background(img, color=None):
     if color is None:
         return img
     background = np.uint8(rgb(color) + (255,)).view('uint32')[0]
-    data = source(img.data, background)
+    data = over(img.data, background)
     return Image(data, coords=img.coords, dims=img.dims)
 
 
@@ -326,8 +329,7 @@ def _square_mask(px):
 def _circle_mask(r):
     """Produce a circular mask with a diameter of ``2 * r + 1``"""
     x = np.arange(-r, r + 1, dtype='i4')
-    bound = r + 0.5 if r > 1 else r
-    return np.where(np.sqrt(x**2 + x[:, None]**2) <= bound, True, False)
+    return np.where(np.sqrt(x**2 + x[:, None]**2) <= r+0.5, True, False)
 
 
 _mask_lookup = {'square': _square_mask,

diff --git a/examples/census.ipynb b/examples/census.ipynb
@@ -59,7 +59,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "The output of `.tail()` shows that there are more than 300 million datapoints (one per person), each with a location in Web Mercator format, and that the race for each datapoint has been encoded as a single character (where 'w' is white, 'b' is black, 'a' is Asian, 'h' is Hispanic, and 'o' is other (typically Native American).\n",
+    "The output of `.tail()` shows that there are more than 300 million datapoints (one per person), each with a location in Web Mercator format, and that the race for each datapoint has been encoded as a single character (where 'w' is white, 'b' is black, 'a' is Asian, 'h' is Hispanic, and 'o' is other (typically Native American)).\n",
     "\n",
     "Let's define some geographic ranges to look at later, and also a default plot size.  Feel free to increase `plot_width` to 2000 or more if you have a very large monitor or want to save files to disk, which shouldn't *greatly* affect the processing time or memory requirements.  "
    ]
@@ -72,10 +72,17 @@
    },
    "outputs": [],
    "source": [
-    "USA =          ((-13884029, -7453304), (2698291, 6455972))\n",
-    "LakeMichigan = ((-10206131, -9348029), (4975642, 5477059))\n",
-    "Chicago =      (( -9828281, -9717659), (5096658, 5161298))\n",
-    "Chinatown =    (( -9759210, -9754583), (5137122, 5139825))\n",
+    "USA =          ((-13884029,  -7453304), (2698291, 6455972))\n",
+    "LakeMichigan = ((-10206131,  -9348029), (4975642, 5477059))\n",
+    "Chicago =      (( -9828281,  -9717659), (5096658, 5161298))\n",
+    "Chinatown =    (( -9759210,  -9754583), (5137122, 5139825))\n",
+    "\n",
+    "NewYorkCity =  (( -8280656,  -8175066), (4940514, 4998954))\n",
+    "LosAngeles =   ((-13195052, -13114944), (3979242, 4023720))\n",
+    "Houston =      ((-10692703, -10539441), (3432521, 3517616))\n",
+    "Austin =       ((-10898752, -10855820), (3525750, 3550837))\n",
+    "NewOrleans =   ((-10059963, -10006348), (3480787, 3510555))\n",
+    "Atlanta =      (( -9448349,  -9354773), (3955797, 4007753))\n",
     "\n",
     "x_range,y_range = USA\n",
     "\n",
@@ -101,9 +108,6 @@
     "black_background = True\n",
     "\n",
     "from IPython.core.display import HTML, display\n",
-    "if black_background:\n",
-    "    display(HTML(\"<style>.output_result { background-color:black !important; color:white }</style>\"))\n",
-    "\n",
     "display(HTML(\"<style>.container { width:100% !important; }</style>\"))"
    ]
   },
@@ -124,6 +128,8 @@
    "source": [
     "def export(img,filename,fmt=\".png\",_return=True):\n",
     "    \"\"\"Given a datashader Image object, saves it to a disk file in the requested format\"\"\"\n",
+    "    if black_background: \n",
+    "        img=tf.set_background(img,\"black\")\n",
     "    img.to_pil().save(filename+fmt)\n",
     "    return img if _return else None\n",
     "\n",
@@ -236,9 +242,9 @@
    "source": [
     "Suddenly, we can see an amazing amount of structure!  There are clearly meaningful patterns at nearly every location, ranging from the geographic variations in the mountainous West, to the densely spaced urban centers in New England, and the many towns stretched out along roadsides in the midwest (especially those leading to Denver, the hot spot towards the right of the Rocky Mountains).  \n",
     "\n",
-    "Clearly, we can now see much more of what's going on in this dataset, thanks to the logarithmic mapping.  Yet the choice of `'log'` was purely arbitrary, and one could easily imagine that other nonlinear functions would show other interesting patterns.  Instead of blindly searching through the space of all such functions, we can step back and notice that the main effect of the log transform has been to reveal *local* patterns at all population densities -- urban areas show up clearly even if they are just slightly more dense than their immediate, rural neighbors, yet they still show up as denser areas in more populated regions.\n",
+    "Clearly, we can now see much more of what's going on in this dataset, thanks to the logarithmic mapping.  Yet the choice of `'log'` was purely arbitrary, and one could easily imagine that other nonlinear functions would show other interesting patterns.  Instead of blindly searching through the space of all such functions, we can step back and notice that the main effect of the log transform has been to reveal *local* patterns at all population densities -- small towns show up clearly even if they are just slightly more dense than their immediate, rural neighbors, yet large cities with high population density also show up well against the surrounding suburban regions, even if those regions are more dense than the small towns on an absolute scale.\n",
     "\n",
-    "With this in mind, let's try the image-processing technique called histogram equalization. I.e., given a set of raw counts, map these into a range for display such that every available color on the screen represents about the same number of samples in the original dataset.  The result is similar to that from the log transform, but is now non-parametric -- it will equalize any linearly or nonlinearly distributed integer data, regardless of the distribution:"
+    "With this idea of showing relative differences across a large range of data values in mind, let's try the image-processing technique called histogram equalization. I.e., given a set of raw counts, map these into a range for display such that every available color on the screen represents about the same number of samples in the original dataset.  The result is similar to that from the log transform, but is now non-parametric -- it will equalize any linearly or nonlinearly distributed integer data, regardless of the distribution:"
    ]
   },
   {
@@ -271,7 +277,7 @@
    },
    "outputs": [],
    "source": [
-    "print(Hot)\n",
+    "print(cm(Hot,0.2))\n",
     "export(tf.interpolate(agg, cmap = cm(Hot,0.2), how='eq_hist'),\"census_ds_hot_eq_hist\")"
    ]
   },
@@ -309,7 +315,7 @@
    },
    "outputs": [],
    "source": [
-    "export(tf.interpolate(agg, cmap=cm(viridis), how='eq_hist'),\"census_viridis_eq_hist.png\")"
+    "export(tf.interpolate(agg, cmap=cm(viridis), how='eq_hist'),\"census_viridis_eq_hist\")"
    ]
   },
   {
@@ -443,7 +449,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Eventually, we can zoom in far enough to see individual datapoints, which we make more visible here using the `tf.spread` function to enlarge each point to cover multiple pixels.  Here we can see that the Chinatown region of Chicago has, as expected, very high numbers of Asian residents, and that other nearby regions (separated by features like roads and highways) have other races, varying in how uniformly segregated they are:"
+    "Eventually, we can zoom in far enough to see individual datapoints.  Here we can see that the Chinatown region of Chicago has, as expected, very high numbers of Asian residents, and that other nearby regions (separated by features like roads and highways) have other races, varying in how uniformly segregated they are:"
    ]
   },
   {
@@ -457,13 +463,112 @@
     "export(tf.spread(create_image(*Chinatown),px=plot_width/400),\"Zoom 3 - Chinatown\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that we've used the `tf.spread` function to enlarge each point to cover multiple pixels so that each point is clearly visible.  Instead of the default circular spreading, you could choose `shape='square'` if you prefer, or any mask shape, e.g.:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "mask = np.array([[1, 1, 1, 1, 1],\n",
+    "                 [1, 0, 0, 0, 1],\n",
+    "                 [1, 0, 0, 0, 1],\n",
+    "                 [1, 0, 0, 0, 1],\n",
+    "                 [1, 1, 1, 1, 1]])\n",
+    "\n",
+    "export(tf.spread(create_image(*Chinatown), mask=mask),\"Chinatown outlines\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Other cities, for comparison\n",
+    "\n",
+    "Different cities have very different racial makeup, but they all appear highly segregated:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*NewYorkCity),\"NYC\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*LosAngeles),\"LosAngeles\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*Houston),\"Houston\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*Atlanta),\"Atlanta\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*NewOrleans),\"NewOrleans\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "collapsed": false
+   },
+   "outputs": [],
+   "source": [
+    "export(create_image(*Austin),\"Austin\")"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
     "## Analyzing racial data through visualization\n",
     "\n",
-    "Now that we have categorical data, we can break it down and ask specific questions.  For instance, if we switch back to the full USA and then select only the black population, we can see that blacks predominantly reside in urban areas except in the South and the East Coast:"
+    "In addition to simply visualizing categorical data, we can break it down and ask specific questions.  For instance, if we switch back to the full USA and then select only the black population, we can see that blacks predominantly reside in urban areas except in the South and the East Coast:"
    ]
   },
   {
@@ -477,7 +582,7 @@
     "cvs = ds.Canvas(plot_width=plot_width, plot_height=plot_height)\n",
     "agg = cvs.points(df, 'meterswest', 'metersnorth', ds.count_cat('race'))\n",
     "\n",
-    "tf.interpolate(agg.sel(race='b'), cmap=cm(Greys9,0.25), how='eq_hist')"
+    "export(tf.interpolate(agg.sel(race='b'), cmap=cm(Greys9,0.25), how='eq_hist'),\"USA blacks\")"
    ]
   },
   {
@@ -497,7 +602,8 @@
    },
    "outputs": [],
    "source": [
-    "tf.colorize(agg.where((agg.sel(race=['w', 'b', 'a', 'h']) > 0).all(dim='race')).fillna(0), color_key, how='eq_hist')"
+    "agg2 = agg.where((agg.sel(race=['w', 'b', 'a', 'h']) > 0).all(dim='race')).fillna(0)\n",
+    "export(tf.colorize(agg2, color_key, how='eq_hist'),\"USA all\")"
    ]
   },
   {
@@ -517,7 +623,7 @@
    },
    "outputs": [],
    "source": [
-    "tf.colorize(agg.where(agg.sel(race='w') < agg.sel(race='b')).fillna(0), color_key, how='eq_hist')"
+    "export(tf.colorize(agg.where(agg.sel(race='w') < agg.sel(race='b')).fillna(0), color_key, how='eq_hist'),\"more_blacks\")"
    ]
   },
   {