From eefe169343fab4510563431e2bcb0f4ac0a2e427 Mon Sep 17 00:00:00 2001
From: Joseph Crail <jbcrail@gmail.com>
Date: Fri, 23 Jun 2017 14:41:13 -0400
Subject: [PATCH 1/3] Add support for optional edge weights

---
 datashader/bundling.py            | 22 ++++++++++--------
 datashader/tests/test_bundling.py | 38 ++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/datashader/bundling.py b/datashader/bundling.py
index a93d5407c..00dd9f46f 100644
--- a/datashader/bundling.py
+++ b/datashader/bundling.py
@@ -101,7 +101,7 @@ def resample_edge(segments, min_segment_length, max_segment_length):
     change, total_resamples = calculate_length(segments, min_segment_length, max_segment_length)
     if not change:
         return segments
-    resampled = np.empty((total_resamples, 2))
+    resampled = np.empty((total_resamples, 3))
     resample_segment(segments, resampled, min_segment_length, max_segment_length)
     return resampled
 
@@ -166,7 +166,7 @@ def draw_to_surface(edge_segments, bandwidth, accuracy):
     img = np.zeros((accuracy + 1, accuracy + 1))
     for segments in edge_segments:
         for point in segments:
-            img[int(point[0] * accuracy), int(point[1] * accuracy)] += 1
+            img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2]
     return gaussian(img, sigma=bandwidth / 2)
 
 
@@ -188,9 +188,10 @@ def _convert_graph_to_edge_segments(nodes, edges):
     Merge graph dataframes into a list of edge segments.
 
     Given a graph defined as a pair of dataframes (nodes and edges), the
-    nodes (id, coordinates) and edges (id, source, target) are joined by
-    node id to create a single dataframe with each source/target of an
-    edge replaced with the respective coordinates.
+    nodes (id, coordinates) and edges (id, source, target, weight) are
+    joined by node id to create a single dataframe with each source/target
+    of an edge (including its optional weight) replaced with the respective
+    coordinates.
 
     All node points are normalized to the range (0, 1) using min-max
     scaling.
@@ -209,11 +210,14 @@ def minmax_scale(series):
     df = pd.merge(nodes, df, left_index=True, right_on=['target'])
     df = df.rename(columns={'x': 'dst_x', 'y': 'dst_y'})
 
-    df = df.filter(items=['src_x', 'src_y', 'dst_x', 'dst_y'])
+    if 'weight' not in edges:
+        df['weight'] = 1
+
+    df = df.filter(items=['src_x', 'src_y', 'dst_x', 'dst_y', 'weight'])
 
     edge_segments = []
     for edge in df.get_values():
-        segments = [[edge[0], edge[1]], [edge[2], edge[3]]]
+        segments = [[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]]
         edge_segments.append(np.array(segments))
     return edge_segments
 
@@ -231,10 +235,10 @@ def _convert_edge_segments_to_dataframe(edge_segments):
     def edge_iterator():
         for edge in edge_segments:
             yield edge
-            yield np.array([[np.nan, np.nan]])
+            yield np.array([[np.nan, np.nan, np.nan]])
 
     df = DataFrame(np.concatenate(list(edge_iterator())))
-    df.columns = ['x', 'y']
+    df.columns = ['x', 'y', 'weight']
     return df
 
 
diff --git a/datashader/tests/test_bundling.py b/datashader/tests/test_bundling.py
index 071b5eb1d..02db4fba6 100644
--- a/datashader/tests/test_bundling.py
+++ b/datashader/tests/test_bundling.py
@@ -28,31 +28,37 @@ def edges():
     return edges_df
 
 
-def assert_eq(a, b):
-    assert a.equals(b)
-
-
 def test_directly_connect(nodes, edges):
     # Expect four lines starting at center (0.5, 0.5) and terminating
     # at a different corner and NaN
-    data = pd.DataFrame({'x': [0.5, 0.0, np.nan, 0.5, 1.0, np.nan,
-                               0.5, 0.0, np.nan, 0.5, 1.0, np.nan],
-                         'y': [0.5, 1.0, np.nan, 0.5, 1.0, np.nan,
-                               0.5, 0.0, np.nan, 0.5, 0.0, np.nan]})
-    expected = pd.DataFrame(data)
+    data = pd.DataFrame({'x':
+                            [0.5, 0.0, np.nan, 0.5, 1.0, np.nan,
+                             0.5, 0.0, np.nan, 0.5, 1.0, np.nan],
+                         'y':
+                            [0.5, 1.0, np.nan, 0.5, 1.0, np.nan,
+                             0.5, 0.0, np.nan, 0.5, 0.0, np.nan],
+                         'weight':
+                            [1.0, 1.0, np.nan, 1.0, 1.0, np.nan,
+                             1.0, 1.0, np.nan, 1.0, 1.0, np.nan]})
+    expected = pd.DataFrame(data, columns=['x', 'y', 'weight'])
 
     given = directly_connect_edges(nodes, edges)
-    assert_eq(given, expected)
+    assert given.equals(expected)
 
 
 def test_hammer_bundle(nodes, edges):
     # Expect four lines starting at center (0.5, 0.5) and terminating
     # with NaN
-    data = pd.DataFrame({'x': [0.5, np.nan, 0.5, np.nan,
-                               0.5, np.nan, 0.5, np.nan],
-                         'y': [0.5, np.nan, 0.5, np.nan,
-                               0.5, np.nan, 0.5, np.nan]})
-    expected = pd.DataFrame(data)
+    data = pd.DataFrame({'x':
+                            [0.5, np.nan, 0.5, np.nan,
+                             0.5, np.nan, 0.5, np.nan],
+                         'y':
+                            [0.5, np.nan, 0.5, np.nan,
+                             0.5, np.nan, 0.5, np.nan],
+                         'weight':
+                            [1.0, np.nan, 1.0, np.nan,
+                             1.0, np.nan, 1.0, np.nan]})
+    expected = pd.DataFrame(data, columns=['x', 'y', 'weight'])
 
     df = hammer_bundle(nodes, edges)
 
@@ -62,4 +68,4 @@ def test_hammer_bundle(nodes, edges):
     given.sort_index(inplace=True)
     given.reset_index(drop=True, inplace=True)
 
-    assert_eq(given, expected)
+    assert given.equals(expected)

From dcf7d2f145b7ae6b202f22de3a0795607e29235b Mon Sep 17 00:00:00 2001
From: Joseph Crail <jbcrail@gmail.com>
Date: Fri, 23 Jun 2017 14:43:30 -0400
Subject: [PATCH 2/3] Fix flake8 warnings

---
 datashader/bundling.py | 33 +++++++++++++++------------------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/datashader/bundling.py b/datashader/bundling.py
index 00dd9f46f..ba5f6ca67 100644
--- a/datashader/bundling.py
+++ b/datashader/bundling.py
@@ -242,7 +242,6 @@ def edge_iterator():
     return df
 
 
-
 class directly_connect_edges(param.ParameterizedFunction):
     """
     Convert a graph into paths suitable for datashading.
@@ -255,7 +254,7 @@ class directly_connect_edges(param.ParameterizedFunction):
     def __call__(self, nodes, edges):
         """
         Convert a graph data structure into a path structure for plotting
-        
+
         Given a set of nodes (as a dataframe with a unique ID for each
         node) and a set of edges (as a dataframe with with columns for the
         source and destination IDs for each edge), returns a dataframe
@@ -275,7 +274,7 @@ class hammer_bundle(directly_connect_edges):
     Breaks each edge into a path with multiple line segments, and
     iteratively curves this path to bundle edges into groups.
     """
-    
+
     initial_bandwidth = param.Number(default=0.05,bounds=(0.0,None),doc="""
         Initial value of the bandwidth....""")
 
@@ -288,7 +287,6 @@ class hammer_bundle(directly_connect_edges):
     batch_size = param.Integer(default=20000,bounds=(1,None),doc="""
         Number of edges to process together""")
 
-    
     tension = param.Number(default=0.3,bounds=(0,None),precedence=-0.5,doc="""
         Exponential smoothing factor to use when smoothing""")
 
@@ -304,54 +302,53 @@ class hammer_bundle(directly_connect_edges):
     max_segment_length = param.Number(default=0.016,bounds=(0,None),precedence=-0.5,doc="""
         Maximum length (in data space?) for an edge segment""")
 
-    
     def __call__(self, nodes, edges, **params):
-        p = param.ParamOverrides(self,params)
+        p = param.ParamOverrides(self, params)
 
         # Convert graph into list of edge segments
         edges = _convert_graph_to_edge_segments(nodes, edges)
-    
+
         # This is simply to let the work split out over multiple cores
         edge_batches = list(batches(edges, p.batch_size))
-    
+
         # This gets the edges split into lots of small segments
         # Doing this inside a delayed function lowers the transmission overhead
         edge_segments = [resample_edges(batch, p.min_segment_length, p.max_segment_length) for batch in edge_batches]
-    
+
         for i in range(p.iterations):
             # Each step, the size of the 'blur' shrinks
             bandwidth = p.initial_bandwidth * p.decay**(i + 1) * p.accuracy
-    
+
             # If it's this small, there won't be a change anyway
             if bandwidth < 2:
                 break
-    
+
             # Draw the density maps and combine them
             images = [draw_to_surface(segment, bandwidth, p.accuracy) for segment in edge_segments]
             overall_image = sum(images)
-    
+
             gradients = get_gradients(overall_image)
-    
+
             # Move edges along the gradients and resample when necessary
             # This could include smoothing to adjust the amount a graph can change
             edge_segments = [advect_resample_all(gradients, segment, p.advect_iterations, p.accuracy, p.min_segment_length, p.max_segment_length)
                              for segment in edge_segments]
-    
+
         # Do a final resample to a smaller size for nicer rendering
         edge_segments = [resample_edges(segment, p.min_segment_length, p.max_segment_length) for segment in edge_segments]
-        
+
         # Finally things can be sent for computation
         edge_segments = compute(*edge_segments)
-    
+
         # Smooth out the graph
         for i in range(10):
             for batch in edge_segments:
                 smooth(batch, p.tension)
-    
+
         # Flatten things
         new_segs = []
         for batch in edge_segments:
             new_segs.extend(batch)
-    
+
         # Convert list of edge segments to Pandas dataframe
         return _convert_edge_segments_to_dataframe(new_segs)

From 0a881a3cd09ebefc5e035766fae011324a286759 Mon Sep 17 00:00:00 2001
From: Joseph Crail <jbcrail@gmail.com>
Date: Fri, 23 Jun 2017 16:49:59 -0400
Subject: [PATCH 3/3] Fix future warnings

---
 datashader/tests/test_dask.py   | 26 +++++++++++++-------------
 datashader/tests/test_pandas.py | 24 ++++++++++++------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py
index 0530a6728..27a56b5d9 100644
--- a/datashader/tests/test_dask.py
+++ b/datashader/tests/test_dask.py
@@ -67,18 +67,18 @@ def test_any():
 
 
 def test_sum():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.sum('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.sum('i64')), out)
-    out = xr.DataArray(np.nansum(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nansum(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.sum('f32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.sum('f64')), out)
 
 
 def test_min():
-    out = xr.DataArray(df.i64.reshape((2, 2, 5)).min(axis=2).astype('f8').T,
+    out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).min(axis=2).astype('f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.min('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.min('i64')), out)
@@ -87,7 +87,7 @@ def test_min():
 
 
 def test_max():
-    out = xr.DataArray(df.i64.reshape((2, 2, 5)).max(axis=2).astype('f8').T,
+    out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).max(axis=2).astype('f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.max('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.max('i64')), out)
@@ -96,33 +96,33 @@ def test_max():
 
 
 def test_mean():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.mean('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.mean('i64')), out)
-    out = xr.DataArray(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.mean('f32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.mean('f64')), out)
 
 
 def test_var():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).var(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.var('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.var('i64')), out)
-    out = xr.DataArray(np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.var('f32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.var('f64')), out)
 
 
 def test_std():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).std(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).std(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.std('i32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.std('i64')), out)
-    out = xr.DataArray(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(ddf, 'x', 'y', ds.std('f32')), out)
     assert_eq(c.points(ddf, 'x', 'y', ds.std('f64')), out)
@@ -147,9 +147,9 @@ def test_multiple_aggregates():
                               i32_count=ds.count('i32')))
 
     f = lambda x: xr.DataArray(x, coords=coords, dims=dims)
-    assert_eq(agg.f64_std, f(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T))
-    assert_eq(agg.f64_mean, f(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T))
-    assert_eq(agg.i32_sum, f(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T))
+    assert_eq(agg.f64_std, f(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T))
+    assert_eq(agg.f64_mean, f(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T))
+    assert_eq(agg.i32_sum, f(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T))
     assert_eq(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4')))
 
 
diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py
index e84514968..32c1dd1c2 100644
--- a/datashader/tests/test_pandas.py
+++ b/datashader/tests/test_pandas.py
@@ -61,11 +61,11 @@ def test_any():
 
 
 def test_sum():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.sum('i32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.sum('i64')), out)
-    out = xr.DataArray(np.nansum(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nansum(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.sum('f32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.sum('f64')), out)
@@ -81,7 +81,7 @@ def test_min():
 
 
 def test_max():
-    out = xr.DataArray(df.i64.reshape((2, 2, 5)).max(axis=2).astype('f8').T,
+    out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).max(axis=2).astype('f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.max('i32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.max('i64')), out)
@@ -90,33 +90,33 @@ def test_max():
 
 
 def test_mean():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.mean('i32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.mean('i64')), out)
-    out = xr.DataArray(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.mean('f32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.mean('f64')), out)
 
 
 def test_var():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).var(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.var('i32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.var('i64')), out)
-    out = xr.DataArray(np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.var('f32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.var('f64')), out)
 
 
 def test_std():
-    out = xr.DataArray(df.i32.reshape((2, 2, 5)).std(axis=2, dtype='f8').T,
+    out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).std(axis=2, dtype='f8').T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.std('i32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.std('i64')), out)
-    out = xr.DataArray(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T,
+    out = xr.DataArray(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T,
                        coords=coords, dims=dims)
     assert_eq(c.points(df, 'x', 'y', ds.std('f32')), out)
     assert_eq(c.points(df, 'x', 'y', ds.std('f64')), out)
@@ -141,9 +141,9 @@ def test_multiple_aggregates():
                               i32_count=ds.count('i32')))
 
     f = lambda x: xr.DataArray(x, coords=coords, dims=dims)
-    assert_eq(agg.f64_std, f(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T))
-    assert_eq(agg.f64_mean, f(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T))
-    assert_eq(agg.i32_sum, f(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T))
+    assert_eq(agg.f64_std, f(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T))
+    assert_eq(agg.f64_mean, f(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T))
+    assert_eq(agg.i32_sum, f(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T))
     assert_eq(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4')))