From eefe169343fab4510563431e2bcb0f4ac0a2e427 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Fri, 23 Jun 2017 14:41:13 -0400 Subject: [PATCH 1/3] Add support for optional edge weights --- datashader/bundling.py | 22 ++++++++++-------- datashader/tests/test_bundling.py | 38 ++++++++++++++++++------------- 2 files changed, 35 insertions(+), 25 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index a93d5407c..00dd9f46f 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -101,7 +101,7 @@ def resample_edge(segments, min_segment_length, max_segment_length): change, total_resamples = calculate_length(segments, min_segment_length, max_segment_length) if not change: return segments - resampled = np.empty((total_resamples, 2)) + resampled = np.empty((total_resamples, 3)) resample_segment(segments, resampled, min_segment_length, max_segment_length) return resampled @@ -166,7 +166,7 @@ def draw_to_surface(edge_segments, bandwidth, accuracy): img = np.zeros((accuracy + 1, accuracy + 1)) for segments in edge_segments: for point in segments: - img[int(point[0] * accuracy), int(point[1] * accuracy)] += 1 + img[int(point[0] * accuracy), int(point[1] * accuracy)] += point[2] return gaussian(img, sigma=bandwidth / 2) @@ -188,9 +188,10 @@ def _convert_graph_to_edge_segments(nodes, edges): Merge graph dataframes into a list of edge segments. Given a graph defined as a pair of dataframes (nodes and edges), the - nodes (id, coordinates) and edges (id, source, target) are joined by - node id to create a single dataframe with each source/target of an - edge replaced with the respective coordinates. + nodes (id, coordinates) and edges (id, source, target, weight) are + joined by node id to create a single dataframe with each source/target + of an edge (including its optional weight) replaced with the respective + coordinates. All node points are normalized to the range (0, 1) using min-max scaling. @@ -209,11 +210,14 @@ def minmax_scale(series): df = pd.merge(nodes, df, left_index=True, right_on=['target']) df = df.rename(columns={'x': 'dst_x', 'y': 'dst_y'}) - df = df.filter(items=['src_x', 'src_y', 'dst_x', 'dst_y']) + if 'weight' not in edges: + df['weight'] = 1 + + df = df.filter(items=['src_x', 'src_y', 'dst_x', 'dst_y', 'weight']) edge_segments = [] for edge in df.get_values(): - segments = [[edge[0], edge[1]], [edge[2], edge[3]]] + segments = [[edge[0], edge[1], edge[4]], [edge[2], edge[3], edge[4]]] edge_segments.append(np.array(segments)) return edge_segments @@ -231,10 +235,10 @@ def _convert_edge_segments_to_dataframe(edge_segments): def edge_iterator(): for edge in edge_segments: yield edge - yield np.array([[np.nan, np.nan]]) + yield np.array([[np.nan, np.nan, np.nan]]) df = DataFrame(np.concatenate(list(edge_iterator()))) - df.columns = ['x', 'y'] + df.columns = ['x', 'y', 'weight'] return df diff --git a/datashader/tests/test_bundling.py b/datashader/tests/test_bundling.py index 071b5eb1d..02db4fba6 100644 --- a/datashader/tests/test_bundling.py +++ b/datashader/tests/test_bundling.py @@ -28,31 +28,37 @@ def edges(): return edges_df -def assert_eq(a, b): - assert a.equals(b) - - def test_directly_connect(nodes, edges): # Expect four lines starting at center (0.5, 0.5) and terminating # at a different corner and NaN - data = pd.DataFrame({'x': [0.5, 0.0, np.nan, 0.5, 1.0, np.nan, - 0.5, 0.0, np.nan, 0.5, 1.0, np.nan], - 'y': [0.5, 1.0, np.nan, 0.5, 1.0, np.nan, - 0.5, 0.0, np.nan, 0.5, 0.0, np.nan]}) - expected = pd.DataFrame(data) + data = pd.DataFrame({'x': + [0.5, 0.0, np.nan, 0.5, 1.0, np.nan, + 0.5, 0.0, np.nan, 0.5, 1.0, np.nan], + 'y': + [0.5, 1.0, np.nan, 0.5, 1.0, np.nan, + 0.5, 0.0, np.nan, 0.5, 0.0, np.nan], + 'weight': + [1.0, 1.0, np.nan, 1.0, 1.0, np.nan, + 1.0, 1.0, np.nan, 1.0, 1.0, np.nan]}) + expected = pd.DataFrame(data, columns=['x', 'y', 'weight']) given = directly_connect_edges(nodes, edges) - assert_eq(given, expected) + assert given.equals(expected) def test_hammer_bundle(nodes, edges): # Expect four lines starting at center (0.5, 0.5) and terminating # with NaN - data = pd.DataFrame({'x': [0.5, np.nan, 0.5, np.nan, - 0.5, np.nan, 0.5, np.nan], - 'y': [0.5, np.nan, 0.5, np.nan, - 0.5, np.nan, 0.5, np.nan]}) - expected = pd.DataFrame(data) + data = pd.DataFrame({'x': + [0.5, np.nan, 0.5, np.nan, + 0.5, np.nan, 0.5, np.nan], + 'y': + [0.5, np.nan, 0.5, np.nan, + 0.5, np.nan, 0.5, np.nan], + 'weight': + [1.0, np.nan, 1.0, np.nan, + 1.0, np.nan, 1.0, np.nan]}) + expected = pd.DataFrame(data, columns=['x', 'y', 'weight']) df = hammer_bundle(nodes, edges) @@ -62,4 +68,4 @@ def test_hammer_bundle(nodes, edges): given.sort_index(inplace=True) given.reset_index(drop=True, inplace=True) - assert_eq(given, expected) + assert given.equals(expected) From dcf7d2f145b7ae6b202f22de3a0795607e29235b Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Fri, 23 Jun 2017 14:43:30 -0400 Subject: [PATCH 2/3] Fix flake8 warnings --- datashader/bundling.py | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/datashader/bundling.py b/datashader/bundling.py index 00dd9f46f..ba5f6ca67 100644 --- a/datashader/bundling.py +++ b/datashader/bundling.py @@ -242,7 +242,6 @@ def edge_iterator(): return df - class directly_connect_edges(param.ParameterizedFunction): """ Convert a graph into paths suitable for datashading. @@ -255,7 +254,7 @@ class directly_connect_edges(param.ParameterizedFunction): def __call__(self, nodes, edges): """ Convert a graph data structure into a path structure for plotting - + Given a set of nodes (as a dataframe with a unique ID for each node) and a set of edges (as a dataframe with with columns for the source and destination IDs for each edge), returns a dataframe @@ -275,7 +274,7 @@ class hammer_bundle(directly_connect_edges): Breaks each edge into a path with multiple line segments, and iteratively curves this path to bundle edges into groups. """ - + initial_bandwidth = param.Number(default=0.05,bounds=(0.0,None),doc=""" Initial value of the bandwidth....""") @@ -288,7 +287,6 @@ class hammer_bundle(directly_connect_edges): batch_size = param.Integer(default=20000,bounds=(1,None),doc=""" Number of edges to process together""") - tension = param.Number(default=0.3,bounds=(0,None),precedence=-0.5,doc=""" Exponential smoothing factor to use when smoothing""") @@ -304,54 +302,53 @@ class hammer_bundle(directly_connect_edges): max_segment_length = param.Number(default=0.016,bounds=(0,None),precedence=-0.5,doc=""" Maximum length (in data space?) for an edge segment""") - def __call__(self, nodes, edges, **params): - p = param.ParamOverrides(self,params) + p = param.ParamOverrides(self, params) # Convert graph into list of edge segments edges = _convert_graph_to_edge_segments(nodes, edges) - + # This is simply to let the work split out over multiple cores edge_batches = list(batches(edges, p.batch_size)) - + # This gets the edges split into lots of small segments # Doing this inside a delayed function lowers the transmission overhead edge_segments = [resample_edges(batch, p.min_segment_length, p.max_segment_length) for batch in edge_batches] - + for i in range(p.iterations): # Each step, the size of the 'blur' shrinks bandwidth = p.initial_bandwidth * p.decay**(i + 1) * p.accuracy - + # If it's this small, there won't be a change anyway if bandwidth < 2: break - + # Draw the density maps and combine them images = [draw_to_surface(segment, bandwidth, p.accuracy) for segment in edge_segments] overall_image = sum(images) - + gradients = get_gradients(overall_image) - + # Move edges along the gradients and resample when necessary # This could include smoothing to adjust the amount a graph can change edge_segments = [advect_resample_all(gradients, segment, p.advect_iterations, p.accuracy, p.min_segment_length, p.max_segment_length) for segment in edge_segments] - + # Do a final resample to a smaller size for nicer rendering edge_segments = [resample_edges(segment, p.min_segment_length, p.max_segment_length) for segment in edge_segments] - + # Finally things can be sent for computation edge_segments = compute(*edge_segments) - + # Smooth out the graph for i in range(10): for batch in edge_segments: smooth(batch, p.tension) - + # Flatten things new_segs = [] for batch in edge_segments: new_segs.extend(batch) - + # Convert list of edge segments to Pandas dataframe return _convert_edge_segments_to_dataframe(new_segs) From 0a881a3cd09ebefc5e035766fae011324a286759 Mon Sep 17 00:00:00 2001 From: Joseph Crail Date: Fri, 23 Jun 2017 16:49:59 -0400 Subject: [PATCH 3/3] Fix future warnings --- datashader/tests/test_dask.py | 26 +++++++++++++------------- datashader/tests/test_pandas.py | 24 ++++++++++++------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/datashader/tests/test_dask.py b/datashader/tests/test_dask.py index 0530a6728..27a56b5d9 100644 --- a/datashader/tests/test_dask.py +++ b/datashader/tests/test_dask.py @@ -67,18 +67,18 @@ def test_any(): def test_sum(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.sum('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.sum('i64')), out) - out = xr.DataArray(np.nansum(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nansum(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.sum('f32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.sum('f64')), out) def test_min(): - out = xr.DataArray(df.i64.reshape((2, 2, 5)).min(axis=2).astype('f8').T, + out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).min(axis=2).astype('f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.min('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.min('i64')), out) @@ -87,7 +87,7 @@ def test_min(): def test_max(): - out = xr.DataArray(df.i64.reshape((2, 2, 5)).max(axis=2).astype('f8').T, + out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).max(axis=2).astype('f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.max('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.max('i64')), out) @@ -96,33 +96,33 @@ def test_max(): def test_mean(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.mean('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.mean('i64')), out) - out = xr.DataArray(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.mean('f32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.mean('f64')), out) def test_var(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).var(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.var('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.var('i64')), out) - out = xr.DataArray(np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.var('f32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.var('f64')), out) def test_std(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).std(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).std(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.std('i32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.std('i64')), out) - out = xr.DataArray(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(ddf, 'x', 'y', ds.std('f32')), out) assert_eq(c.points(ddf, 'x', 'y', ds.std('f64')), out) @@ -147,9 +147,9 @@ def test_multiple_aggregates(): i32_count=ds.count('i32'))) f = lambda x: xr.DataArray(x, coords=coords, dims=dims) - assert_eq(agg.f64_std, f(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T)) - assert_eq(agg.f64_mean, f(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T)) - assert_eq(agg.i32_sum, f(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) + assert_eq(agg.f64_std, f(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T)) + assert_eq(agg.f64_mean, f(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T)) + assert_eq(agg.i32_sum, f(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) assert_eq(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4'))) diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index e84514968..32c1dd1c2 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -61,11 +61,11 @@ def test_any(): def test_sum(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.sum('i32')), out) assert_eq(c.points(df, 'x', 'y', ds.sum('i64')), out) - out = xr.DataArray(np.nansum(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nansum(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.sum('f32')), out) assert_eq(c.points(df, 'x', 'y', ds.sum('f64')), out) @@ -81,7 +81,7 @@ def test_min(): def test_max(): - out = xr.DataArray(df.i64.reshape((2, 2, 5)).max(axis=2).astype('f8').T, + out = xr.DataArray(df.i64.values.reshape((2, 2, 5)).max(axis=2).astype('f8').T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.max('i32')), out) assert_eq(c.points(df, 'x', 'y', ds.max('i64')), out) @@ -90,33 +90,33 @@ def test_max(): def test_mean(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).mean(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.mean('i32')), out) assert_eq(c.points(df, 'x', 'y', ds.mean('i64')), out) - out = xr.DataArray(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.mean('f32')), out) assert_eq(c.points(df, 'x', 'y', ds.mean('f64')), out) def test_var(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).var(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).var(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.var('i32')), out) assert_eq(c.points(df, 'x', 'y', ds.var('i64')), out) - out = xr.DataArray(np.nanvar(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanvar(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.var('f32')), out) assert_eq(c.points(df, 'x', 'y', ds.var('f64')), out) def test_std(): - out = xr.DataArray(df.i32.reshape((2, 2, 5)).std(axis=2, dtype='f8').T, + out = xr.DataArray(df.i32.values.reshape((2, 2, 5)).std(axis=2, dtype='f8').T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.std('i32')), out) assert_eq(c.points(df, 'x', 'y', ds.std('i64')), out) - out = xr.DataArray(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T, + out = xr.DataArray(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T, coords=coords, dims=dims) assert_eq(c.points(df, 'x', 'y', ds.std('f32')), out) assert_eq(c.points(df, 'x', 'y', ds.std('f64')), out) @@ -141,9 +141,9 @@ def test_multiple_aggregates(): i32_count=ds.count('i32'))) f = lambda x: xr.DataArray(x, coords=coords, dims=dims) - assert_eq(agg.f64_std, f(np.nanstd(df.f64.reshape((2, 2, 5)), axis=2).T)) - assert_eq(agg.f64_mean, f(np.nanmean(df.f64.reshape((2, 2, 5)), axis=2).T)) - assert_eq(agg.i32_sum, f(df.i32.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) + assert_eq(agg.f64_std, f(np.nanstd(df.f64.values.reshape((2, 2, 5)), axis=2).T)) + assert_eq(agg.f64_mean, f(np.nanmean(df.f64.values.reshape((2, 2, 5)), axis=2).T)) + assert_eq(agg.i32_sum, f(df.i32.values.reshape((2, 2, 5)).sum(axis=2, dtype='f8').T)) assert_eq(agg.i32_count, f(np.array([[5, 5], [5, 5]], dtype='i4')))