From 7370343a72a360bb13edafa89495f2ca05c647b9 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 15:54:10 -0800 Subject: [PATCH 01/26] fixed flake8 warnings and errors --- python/cugraph/bfs/test_bfs.py | 46 ++++++++++------- python/cugraph/graph/test_graph.py | 51 +++++++++++-------- python/cugraph/grmat/test_grmat.py | 4 +- python/cugraph/jaccard/test_jaccard.py | 46 ++++++++--------- python/cugraph/jaccard/test_wjaccard.py | 42 ++++++++------- python/cugraph/louvain/test_louvain.py | 36 +++++++------ python/cugraph/pagerank/test_pagerank.py | 32 ++++++------ .../spectral_clustering/test_balanced_cut.py | 30 ++++++----- .../spectral_clustering/test_modularity.py | 30 ++++++----- python/cugraph/sssp/test_sssp.py | 2 +- python/setup.py | 41 +++++++++------ 11 files changed, 194 insertions(+), 166 deletions(-) diff --git a/python/cugraph/bfs/test_bfs.py b/python/cugraph/bfs/test_bfs.py index 5f6ed059add..b691f39c10d 100644 --- a/python/cugraph/bfs/test_bfs.py +++ b/python/cugraph/bfs/test_bfs.py @@ -11,12 +11,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import time -from scipy.io import mmread import pytest +import queue +import time import numpy as np +from scipy.io import mmread +import cudf +import cugraph + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') @@ -24,16 +26,15 @@ def ReadMtxFile(mmFile): def cugraph_Call(M, start_vertex): - # Device data M = M.tocsr() sources = cudf.Series(M.indptr) destinations = cudf.Series(M.indices) values = cudf.Series(M.data) - + G = cugraph.Graph() G.add_adj_list(sources, destinations, values) - + t1 = time.time() df = cugraph.bfs(G, start_vertex) t2 = time.time() - t1 @@ -44,40 +45,47 @@ def cugraph_Call(M, start_vertex): def base_Call(M, start_vertex): - intMax = 2147483647 + int_max = 2**31 - 1 + M = M.tocsr() + offsets = M.indptr indices = M.indices num_verts = len(offsets) - 1 dist = np.zeros(num_verts, dtype=np.int32) - + for i in range(num_verts): - dist[i] = intMax - import queue + dist[i] = int_max + q = queue.Queue() q.put(start_vertex) dist[start_vertex] = 0 while(not q.empty()): u = q.get() - for iCol in range(offsets[u],offsets[u + 1]): + for iCol in range(offsets[u], offsets[u + 1]): v = indices[iCol] - if (dist[v] == intMax): + if (dist[v] == int_max): dist[v] = dist[u] + 1 q.put(v) + return dist + datasets = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/polbooks.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + @pytest.mark.parametrize('graph_file', datasets) def test_bfs(graph_file): - M = ReadMtxFile(graph_file) + base_dist = base_Call(M, 0) - dist = cugraph_Call(M, 0) - - assert len(base_dist) == len(dist) - for i in range(len(dist)): - assert base_dist[i] == dist[i] + cugraph_dist = cugraph_Call(M, 0) + + # Calculating mismatch + + assert len(base_dist) == len(cugraph_dist) + for i in range(len(cugraph_dist)): + assert base_dist[i] == cugraph_dist[i] diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index 7b9097e76c2..53c5a02c113 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -17,34 +17,41 @@ import numpy as np from scipy.io import mmread + def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() + def compare_series(series_1, series_2): if (len(series_1) != len(series_2)): print("Series do not match in length") return 0 for i in range(len(series_1)): if(series_1[i] != series_2[i]): - print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + ", " + str(series_2[i])) + print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + + ", " + str(series_2[i])) return 0 return 1 -def compareOffsets(cu, np): + +def compare_offsets(cu, np): if not (len(cu) <= len(np)): print("Mismatched length: " + str(len(cu)) + " != " + str(len(np))) return False for i in range(len(cu)): if cu[i] != np[i]: - print("Series[" + str(i) + "]: " + str(cu[i]) + " != " + str(np[i])) + print("Series[" + str(i) + "]: " + str(cu[i]) + " != " + + str(np[i])) return False return True -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +datasets = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] + @pytest.mark.parametrize('graph_file', datasets) def test_add_edge_list_to_adj_list(graph_file): @@ -53,30 +60,31 @@ def test_add_edge_list_to_adj_list(graph_file): destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets_exp = M.indptr indices_exp = M.indices # cugraph add_egde_list to_adj_list call G = cugraph.Graph() - G.add_edge_list(sources,destinations, None) + G.add_edge_list(sources, destinations, None) offsets_cu, indices_cu = G.view_adj_list() - assert compareOffsets(offsets_cu, offsets_exp) + assert compare_offsets(offsets_cu, offsets_exp) assert compare_series(indices_cu, indices_exp) + @pytest.mark.parametrize('graph_file', datasets) def test_add_adj_list_to_edge_list(graph_file): M = ReadMtxFile(graph_file) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -92,9 +100,10 @@ def test_add_adj_list_to_edge_list(graph_file): destinations_cu = np.array(destinations) assert compare_series(sources_cu, sources_exp) assert compare_series(destinations_cu, destinations_exp) - + + @pytest.mark.parametrize('graph_file', datasets) -def test_transpose_from_adj_list(graph_file): +def test_transpose_from_adj_list(graph_file): M = ReadMtxFile(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) @@ -105,8 +114,9 @@ def test_transpose_from_adj_list(graph_file): Mt = M.transpose().tocsr() toff, tind = G.view_transpose_adj_list() assert compare_series(Mt.indices, tind) - assert compareOffsets(toff, Mt.indptr) - + assert compare_offsets(toff, Mt.indptr) + + @pytest.mark.parametrize('graph_file', datasets) def test_view_edge_list_from_adj_list(graph_file): M = ReadMtxFile(graph_file) @@ -121,7 +131,8 @@ def test_view_edge_list_from_adj_list(graph_file): dst1 = M.col assert compare_series(src1, src2) assert compare_series(dst1, dst2) - + + @pytest.mark.parametrize('graph_file', datasets) def test_delete_edge_list_delete_adj_list(graph_file): M = ReadMtxFile(graph_file) @@ -129,11 +140,11 @@ def test_delete_edge_list_delete_adj_list(graph_file): destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -150,5 +161,3 @@ def test_delete_edge_list_delete_adj_list(graph_file): with pytest.raises(cudf.bindings.GDFError.GDFError) as excinfo: G.view_edge_list() assert excinfo.value.errcode.decode() == 'GDF_INVALID_API_CALL' - - diff --git a/python/cugraph/grmat/test_grmat.py b/python/cugraph/grmat/test_grmat.py index 3d3933b6191..0db1ef816d5 100644 --- a/python/cugraph/grmat/test_grmat.py +++ b/python/cugraph/grmat/test_grmat.py @@ -12,5 +12,7 @@ # limitations under the License. import cugraph -vertices, edges, sources, destinations = cugraph.grmat_gen('grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') + +vertices, edges, sources, destinations = cugraph.grmat_gen( + 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index f8d0a24d37a..bfae37ef7da 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -13,44 +13,42 @@ import cugraph import cudf -import numpy as np -import sys import time from scipy.io import mmread import networkx as nx -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return df['jaccard_coeff'].to_array() + def networkx_Call(M): M = M.tocsr() @@ -59,8 +57,9 @@ def networkx_Call(M): destinations = M.col edges = [] for i in range(len(sources)): - edges.append((sources[i],destinations[i])) - # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly + edges.append((sources[i], destinations[i])) + # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this + # explicitly print('Format conversion ... ') # Directed NetworkX graph @@ -71,21 +70,21 @@ def networkx_Call(M): print('Solving... ') t1 = time.time() preds = nx.jaccard_coefficient(Gnx, edges) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) coeff = [] - for u,v,p in preds: + for u, v, p in preds: coeff.append(p) return coeff - -datasets = ['/datasets/networks/dolphins.mtx', - '/datasets/networks/karate.mtx', + +datasets = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_jaccard(graph_file): M = ReadMtxFile(graph_file) @@ -96,10 +95,7 @@ def test_jaccard(graph_file): tol = 1.0e-06 assert len(cu_coeff) == len(nx_coeff) for i in range(len(cu_coeff)): - if(abs(cu_coeff[i] -nx_coeff[i])>tol*1.1): - err+=1 - print("Mismatches: %d" %err) + if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1): + err += 1 + print("Mismatches: %d" % err) assert err == 0 - - - diff --git a/python/cugraph/jaccard/test_wjaccard.py b/python/cugraph/jaccard/test_wjaccard.py index bda9000863d..ac21a85e36e 100644 --- a/python/cugraph/jaccard/test_wjaccard.py +++ b/python/cugraph/jaccard/test_wjaccard.py @@ -14,58 +14,56 @@ import cugraph import cudf import numpy as np -import sys import time from scipy.io import mmread import networkx as nx -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Callw(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - #values = cudf.Series(np.ones(len(col_indices), dtype = np.float32), nan_as_null = False) - weights_arr = cudf.Series(np.ones(len(row_offsets), dtype = np.float32), nan_as_null = False) - + # values = cudf.Series(np.ones(len(col_indices), dtype=np.float32), + # nan_as_null=False) + weights_arr = cudf.Series(np.ones(len(row_offsets), dtype=np.float32), + nan_as_null=False) + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard_w(G, weights_arr) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return df['jaccard_coeff'] - -datasets = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] +datasets = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_wjaccard(graph_file): M = ReadMtxFile(graph_file) - cu_coeff = cuGraph_Callw(M) - - # no NetworkX equivalent to compare against... - - - - + # suppress F841 (local variable is assigned but never used) in flake8 + # no networkX equivalent to compare cu_coeff against... + cu_coeff = cuGraph_Callw(M) # noqa: F841 + # this test is incomplete... diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index 46978dcdeb1..22da1b6b230 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -13,82 +13,80 @@ import cugraph import cudf -import numpy as np -import sys import time from scipy.io import mmread import networkx as nx import community -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G = cugraph.Graph() - G.add_adj_list(row_offsets, col_indices, values) + G.add_adj_list(row_offsets, col_indices, values) # cugraph Louvain Call t1 = time.time() parts, mod = cugraph.nvLouvain(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts, mod + def networkx_Call(M): M = M.tocsr() # Directed NetworkX graph Gnx = nx.Graph(M) - #z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} + # z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} # Networkx Jaccard Call print('Solving... ') t1 = time.time() parts = community.best_partition(Gnx) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts - -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +datasets = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_louvain(graph_file): M = ReadMtxFile(graph_file) cu_parts, cu_mod = cuGraph_Call(M) nx_parts = networkx_Call(M) - + # Calculating modularity scores for comparison Gnx = nx.Graph(M) - cu_map = {0:0} + cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) assert cu_mod > (.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < .0001 \ No newline at end of file + assert abs(cu_mod - cu_mod_nx) < .0001 diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index e42a3e0db67..9bca7d430a1 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -11,12 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf +import pytest import time -from scipy.io import mmread import networkx as nx -import pytest +from scipy.io import mmread +import cudf +import cugraph print('Networkx version : {} '.format(nx.__version__)) @@ -27,12 +27,11 @@ def ReadMtxFile(mmFile): def cugraph_Call(M, max_iter, tol, alpha): - # Device data sources = cudf.Series(M.row) destinations = cudf.Series(M.col) # values = cudf.Series(np.ones(len(sources), dtype = np.float64)) - + # cugraph Pagerank Call G = cugraph.Graph() G.add_edge_list(sources, destinations, None) @@ -51,7 +50,6 @@ def cugraph_Call(M, max_iter, tol, alpha): def networkx_Call(M, max_iter, tol, alpha): - nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} for nnz in range(M.getnnz()): nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] @@ -97,25 +95,27 @@ def networkx_Call(M, max_iter, tol, alpha): '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -Max_Iterations = [500] +max_iterations = [500] tolerance = [1.0e-06] alpha = [0.85] @pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('max_iter', Max_Iterations) +@pytest.mark.parametrize('max_iter', max_iterations) @pytest.mark.parametrize('tol', tolerance) @pytest.mark.parametrize('alpha', alpha) def test_pagerank(graph_file, max_iter, tol, alpha): - M = ReadMtxFile(graph_file) - sorted_pr = cugraph_Call(M, max_iter, tol, alpha) - items = networkx_Call(M, max_iter, tol, alpha) + + networkx_pr = networkx_Call(M, max_iter, tol, alpha) + cugraph_pr = cugraph_Call(M, max_iter, tol, alpha) + # Calculating mismatch + err = 0 - # assert len(sorted_pr) == len(items) - for i in range(len(sorted_pr)): - if(abs(sorted_pr[i][1]-items[i][1]) > tol*1.1): + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if(abs(cugraph_pr[i][1]-networkx_pr[i][1]) > tol*1.1): err = err + 1 print(err) - assert err < (0.01*len(sorted_pr)) + assert err < (0.01*len(cugraph_pr)) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 81a112aaadd..3634002fde5 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -13,37 +13,40 @@ import cugraph import cudf -import numpy as np -import sys -import time from scipy.io import mmread -import community -import os import pytest import random + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(G, partitions): - df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) + df = cugraph.spectralBalancedCutClustering(G, partitions, + num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df['cluster']) return score + def random_Call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return score - -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + +datasets = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] partitions = [2, 4, 8] + + @pytest.mark.parametrize('graph_file', datasets) @pytest.mark.parametrize('partitions', partitions) def test_modularityClustering(graph_file, partitions): @@ -54,10 +57,11 @@ def test_modularityClustering(graph_file, partitions): values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment cu_score = cuGraph_Call(G, partitions) rand_score = random_Call(G, partitions) - - # Assert that the partitioning has better modularity than the random assignment + + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score < rand_score diff --git a/python/cugraph/spectral_clustering/test_modularity.py b/python/cugraph/spectral_clustering/test_modularity.py index 0238be2a194..f39ff838af5 100644 --- a/python/cugraph/spectral_clustering/test_modularity.py +++ b/python/cugraph/spectral_clustering/test_modularity.py @@ -13,37 +13,40 @@ import cugraph import cudf -import numpy as np -import sys -import time from scipy.io import mmread -import community -import os import pytest import random + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(G, partitions): - df = cugraph.spectralModularityMaximizationClustering(G, partitions, num_eigen_vects=(partitions - 1)) + df = cugraph.spectralModularityMaximizationClustering( + G, partitions, num_eigen_vects=(partitions - 1)) score = cugraph.analyzeClustering_modularity(G, partitions, df['cluster']) return score + def random_Call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_modularity(G, partitions, assignment_cu) return score - -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + +datasets = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] partitions = [2, 4, 8] + + @pytest.mark.parametrize('graph_file', datasets) @pytest.mark.parametrize('partitions', partitions) def test_modularityClustering(graph_file, partitions): @@ -54,10 +57,11 @@ def test_modularityClustering(graph_file, partitions): values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment cu_score = cuGraph_Call(G, partitions) rand_score = random_Call(G, partitions) - - # Assert that the partitioning has better modularity than the random assignment + + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score > rand_score diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index 6c3cb279804..69b560339aa 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -71,7 +71,7 @@ def networkx_Call(M, source): print('NX Solving... ') t1 = time.time() - path=nx.single_source_shortest_path(Gnx, source) + path = nx.single_source_shortest_path(Gnx, source) t2 = time.time() - t1 diff --git a/python/setup.py b/python/setup.py index d56904c1509..886d4194dfd 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,6 +1,6 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from setuptools import setup, find_packages +from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize import numpy @@ -11,20 +11,20 @@ import os import sys -install_requires = [ - 'numba', - 'cython' -] +install_requires = ['numba', 'cython'] + def find_in_path(name, path): "Find a file in a search path" - #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + # adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path for dir in path.split(os.pathsep): binpath = pjoin(dir, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None + def locate_cuda(): """Locate the CUDA environment on the system Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' @@ -41,30 +41,39 @@ def locate_cuda(): # otherwise, search the PATH for NVCC nvcc = find_in_path('nvcc', os.environ['PATH']) if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + raise EnvironmentError( + 'The nvcc binary could not be located in your $PATH. ' + 'Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) - cudaconfig = {'home':home, 'nvcc':nvcc, + cudaconfig = {'home': home, 'nvcc': nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} for k, v in iter(cudaconfig.items()): if not os.path.exists(v): - raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + raise EnvironmentError( + 'The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig + def locate_nvgraph(): if 'CONDA_PREFIX' in os.environ: - nvgraph_found = find_in_path('lib/libnvgraph_st.so', os.environ['CONDA_PREFIX']) + nvgraph_found = find_in_path('lib/libnvgraph_st.so', + os.environ['CONDA_PREFIX']) if nvgraph_found is None: - nvgraph_found = find_in_path('libnvgraph_st.so', os.environ['LD_LIBRARY_PATH']) + nvgraph_found = find_in_path('libnvgraph_st.so', + os.environ['LD_LIBRARY_PATH']) if nvgraph_found is None: - raise EnvironmentError('The nvgraph library could not be located') - nvgraph_config = {'include':pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), 'include', 'nvgraph'), - 'lib':os.path.dirname(nvgraph_found)} + raise EnvironmentError('The nvgraph library could not be located') + nvgraph_config = { + 'include': pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), + 'include', 'nvgraph'), + 'lib': os.path.dirname(nvgraph_found)} + return nvgraph_config - + + CUDA = locate_cuda() NVGRAPH = locate_nvgraph() From 446873ae29d871262ce56f17805851bc421f74fb Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 22:52:18 -0800 Subject: [PATCH 02/26] fixed pylint errors and warnings (except for ones related to missing docstring, short variable names, superfluous parens, and consider using enumerates). --- python/cugraph/bfs/test_bfs.py | 28 ++++----- python/cugraph/graph/test_graph.py | 49 ++++++++-------- python/cugraph/jaccard/test_jaccard.py | 29 +++++----- python/cugraph/jaccard/test_wjaccard.py | 27 +++++---- python/cugraph/louvain/test_louvain.py | 31 +++++----- python/cugraph/pagerank/test_pagerank.py | 37 ++++++------ .../spectral_clustering/test_balanced_cut.py | 36 ++++++------ .../spectral_clustering/test_modularity.py | 38 +++++++------ python/cugraph/sssp/test_sssp.py | 32 ++++++----- python/setup.py | 57 ++++++++++--------- 10 files changed, 193 insertions(+), 171 deletions(-) diff --git a/python/cugraph/bfs/test_bfs.py b/python/cugraph/bfs/test_bfs.py index b691f39c10d..4e5fc5c3bf7 100644 --- a/python/cugraph/bfs/test_bfs.py +++ b/python/cugraph/bfs/test_bfs.py @@ -11,21 +11,23 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import queue import time + import numpy as np +import pytest from scipy.io import mmread + import cudf import cugraph -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, start_vertex): +def cugraph_call(M, start_vertex): # Device data M = M.tocsr() sources = cudf.Series(M.indptr) @@ -44,7 +46,7 @@ def cugraph_Call(M, start_vertex): return df['distance'].to_array() -def base_Call(M, start_vertex): +def base_call(M, start_vertex): int_max = 2**31 - 1 M = M.tocsr() @@ -62,8 +64,8 @@ def base_Call(M, start_vertex): dist[start_vertex] = 0 while(not q.empty()): u = q.get() - for iCol in range(offsets[u], offsets[u + 1]): - v = indices[iCol] + for i_col in range(offsets[u], offsets[u + 1]): + v = indices[i_col] if (dist[v] == int_max): dist[v] = dist[u] + 1 q.put(v) @@ -71,18 +73,18 @@ def base_Call(M, start_vertex): return dist -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/polbooks.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_bfs(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) - base_dist = base_Call(M, 0) - cugraph_dist = cugraph_Call(M, 0) + base_dist = base_call(M, 0) + cugraph_dist = cugraph_call(M, 0) # Calculating mismatch diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index 53c5a02c113..b884fba3298 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -11,16 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import pytest import numpy as np +import pytest from scipy.io import mmread +import cugraph +import cudf + -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() def compare_series(series_1, series_2): @@ -35,27 +36,27 @@ def compare_series(series_1, series_2): return 1 -def compare_offsets(cu, np): - if not (len(cu) <= len(np)): - print("Mismatched length: " + str(len(cu)) + " != " + str(len(np))) +def compare_offsets(offset0, offset1): + if not (len(offset0) <= len(offset1)): + print("Mismatched length: " + str(len(offset0)) + " != " + str(len(offset1))) return False - for i in range(len(cu)): - if cu[i] != np[i]: - print("Series[" + str(i) + "]: " + str(cu[i]) + " != " - + str(np[i])) + for i in range(len(offset0)): + if offset0[i] != offset1[i]: + print("Series[" + str(i) + "]: " + str(offset0[i]) + " != " + + str(offset1[i])) return False return True -datasets = ['/datasets/networks/karate.mtx', +DATASETS = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_edge_list_to_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) @@ -76,9 +77,9 @@ def test_add_edge_list_to_adj_list(graph_file): assert compare_series(indices_cu, indices_exp) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_adj_list_to_edge_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -102,9 +103,9 @@ def test_add_adj_list_to_edge_list(graph_file): assert compare_series(destinations_cu, destinations_exp) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_transpose_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -117,9 +118,9 @@ def test_transpose_from_adj_list(graph_file): assert compare_offsets(toff, Mt.indptr) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_view_edge_list_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -133,9 +134,9 @@ def test_view_edge_list_from_adj_list(graph_file): assert compare_series(dst1, dst2) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index bfae37ef7da..919c59e439b 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -11,22 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread + import networkx as nx import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -49,7 +52,7 @@ def cuGraph_Call(M): return df['jaccard_coeff'].to_array() -def networkx_Call(M): +def networkx_call(M): M = M.tocsr() M = M.tocoo() @@ -79,17 +82,17 @@ def networkx_Call(M): return coeff -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_jaccard(graph_file): - M = ReadMtxFile(graph_file) - cu_coeff = cuGraph_Call(M) - nx_coeff = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_coeff = cugraph_call(M) + nx_coeff = networkx_call(M) # Calculating mismatch err = 0 tol = 1.0e-06 diff --git a/python/cugraph/jaccard/test_wjaccard.py b/python/cugraph/jaccard/test_wjaccard.py index ac21a85e36e..9a4315893d5 100644 --- a/python/cugraph/jaccard/test_wjaccard.py +++ b/python/cugraph/jaccard/test_wjaccard.py @@ -11,23 +11,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np import time -from scipy.io import mmread + import networkx as nx +import numpy as np import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Callw(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -54,16 +57,16 @@ def cuGraph_Callw(M): return df['jaccard_coeff'] -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_wjaccard(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) # suppress F841 (local variable is assigned but never used) in flake8 # no networkX equivalent to compare cu_coeff against... - cu_coeff = cuGraph_Callw(M) # noqa: F841 + cu_coeff = cugraph_call(M) # noqa: F841 # this test is incomplete... diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index 22da1b6b230..dc20714437c 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -11,23 +11,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread -import networkx as nx + import community +import networkx as nx import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -51,7 +54,7 @@ def cuGraph_Call(M): return parts, mod -def networkx_Call(M): +def networkx_call(M): M = M.tocsr() # Directed NetworkX graph @@ -69,16 +72,16 @@ def networkx_Call(M): return parts -datasets = ['/datasets/networks/karate.mtx', +DATASETS = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_louvain(graph_file): - M = ReadMtxFile(graph_file) - cu_parts, cu_mod = cuGraph_Call(M) - nx_parts = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_parts, cu_mod = cugraph_call(M) + nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.Graph(M) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index 9bca7d430a1..642c6f9f5f3 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -11,22 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import time + import networkx as nx +import pytest from scipy.io import mmread + import cudf import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, max_iter, tol, alpha): +def cugraph_call(M, max_iter, tol, alpha): # Device data sources = cudf.Series(M.row) destinations = cudf.Series(M.col) @@ -49,7 +52,7 @@ def cugraph_Call(M, max_iter, tol, alpha): return sorted(sorted_pr, key=lambda x: x[1], reverse=True) -def networkx_Call(M, max_iter, tol, alpha): +def networkx_call(M, max_iter, tol, alpha): nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} for nnz in range(M.getnnz()): nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] @@ -91,24 +94,24 @@ def networkx_Call(M, max_iter, tol, alpha): return sorted(pr.items(), key=lambda x: x[1], reverse=True) -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -max_iterations = [500] -tolerance = [1.0e-06] -alpha = [0.85] +MAX_ITERATIONS = [500] +TOLERANCE = [1.0e-06] +ALPHA = [0.85] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('max_iter', max_iterations) -@pytest.mark.parametrize('tol', tolerance) -@pytest.mark.parametrize('alpha', alpha) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('max_iter', MAX_ITERATIONS) +@pytest.mark.parametrize('tol', TOLERANCE) +@pytest.mark.parametrize('alpha', ALPHA) def test_pagerank(graph_file, max_iter, tol, alpha): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) - networkx_pr = networkx_Call(M, max_iter, tol, alpha) - cugraph_pr = cugraph_Call(M, max_iter, tol, alpha) + networkx_pr = networkx_call(M, max_iter, tol, alpha) + cugraph_pr = cugraph_call(M, max_iter, tol, alpha) # Calculating mismatch diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 3634002fde5..8a719eefcc6 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -11,26 +11,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -from scipy.io import mmread -import pytest import random +import pytest +from scipy.io import mmread + +import cudf +import cugraph + -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(G, partitions): +def cugraph_call(G, partitions): df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df['cluster']) return score -def random_Call(G, partitions): +def random_call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): @@ -40,18 +42,18 @@ def random_Call(G, partitions): return score -datasets = [ +DATASETS = [ '/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] +PARTITIONS = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) @@ -59,8 +61,8 @@ def test_modularityClustering(graph_file, partitions): G.add_adj_list(row_offsets, col_indices, values) # Get the modularity score for partitioning versus random assignment - cu_score = cuGraph_Call(G, partitions) - rand_score = random_Call(G, partitions) + cu_score = cugraph_call(G, partitions) + rand_score = random_call(G, partitions) # Assert that the partitioning has better modularity than the random # assignment diff --git a/python/cugraph/spectral_clustering/test_modularity.py b/python/cugraph/spectral_clustering/test_modularity.py index f39ff838af5..5421af7640e 100644 --- a/python/cugraph/spectral_clustering/test_modularity.py +++ b/python/cugraph/spectral_clustering/test_modularity.py @@ -11,26 +11,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -from scipy.io import mmread -import pytest import random +import pytest +from scipy.io import mmread + +import cudf +import cugraph + -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(G, partitions): +def cugraph_call(G, partitions): df = cugraph.spectralModularityMaximizationClustering( - G, partitions, num_eigen_vects=(partitions - 1)) + G, partitions, num_eigen_vects=(partitions - 1)) score = cugraph.analyzeClustering_modularity(G, partitions, df['cluster']) return score -def random_Call(G, partitions): +def random_call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): @@ -40,18 +42,18 @@ def random_Call(G, partitions): return score -datasets = [ +DATASETS = [ '/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] +PARTITIONS = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) @@ -59,8 +61,8 @@ def test_modularityClustering(graph_file, partitions): G.add_adj_list(row_offsets, col_indices, values) # Get the modularity score for partitioning versus random assignment - cu_score = cuGraph_Call(G, partitions) - rand_score = random_Call(G, partitions) + cu_score = cugraph_call(G, partitions) + rand_score = random_call(G, partitions) # Assert that the partitioning has better modularity than the random # assignment diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index 69b560339aa..b139c76d311 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -11,23 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread + import networkx as nx import numpy as np import pytest +from scipy.io import mmread + +import cudf +import cugraph print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, source): +def cugraph_call(M, source): # Device data sources = cudf.Series(M.row) @@ -56,7 +58,7 @@ def cugraph_Call(M, source): return distances -def networkx_Call(M, source): +def networkx_call(M, source): print('Format conversion ... ') M = M.tocsr() @@ -80,20 +82,20 @@ def networkx_Call(M, source): return path -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -source = [1] +SOURCES = [1] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('source', source) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('source', SOURCES) def test_sssp(graph_file, source): - M = ReadMtxFile(graph_file) - cu_paths = cugraph_Call(M, source) - nx_paths = networkx_Call(M, source) + M = read_mtx_file(graph_file) + cu_paths = cugraph_call(M, source) + nx_paths = networkx_call(M, source) # Calculating mismatch err = 0 diff --git a/python/setup.py b/python/setup.py index 886d4194dfd..0df50e480c1 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,25 +1,26 @@ # Copyright (c) 2018, NVIDIA CORPORATION. +from distutils.sysconfig import get_python_lib +import os +from os.path import join as pjoin +import sys + from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize import numpy - import versioneer -from distutils.sysconfig import get_python_lib -from os.path import join as pjoin -import os -import sys -install_requires = ['numba', 'cython'] + +INSTALL_REQUIRES = ['numba', 'cython'] def find_in_path(name, path): "Find a file in a search path" # adapted fom # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) + for directory in path.split(os.pathsep): + binpath = pjoin(directory, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None @@ -42,8 +43,8 @@ def locate_cuda(): nvcc = find_in_path('nvcc', os.environ['PATH']) if nvcc is None: raise EnvironmentError( - 'The nvcc binary could not be located in your $PATH. ' - 'Either add it to your path, or set $CUDAHOME') + 'The nvcc binary could not be located in your $PATH. ' + 'Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) cudaconfig = {'home': home, 'nvcc': nvcc, @@ -52,7 +53,7 @@ def locate_cuda(): for k, v in iter(cudaconfig.items()): if not os.path.exists(v): raise EnvironmentError( - 'The CUDA %s path could not be located in %s' % (k, v)) + 'The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig @@ -78,18 +79,18 @@ def locate_nvgraph(): NVGRAPH = locate_nvgraph() try: - numpy_include = numpy.get_include() + NUMPY_INCLUDE = numpy.get_include() except AttributeError: - numpy_include = numpy.get_numpy_include() + NUMPY_INCLUDE = numpy.get_numpy_include() -cudf_include = os.path.normpath(sys.prefix) + '/include' -cython_files = ['cugraph/*.pyx'] +CUDF_INCLUDE = os.path.normpath(sys.prefix) + '/include' +CYTHON_FILES = ['cugraph/*.pyx'] -extensions = [ +EXTENSIONS = [ Extension("cugraph", - sources=cython_files, - include_dirs=[numpy_include, - cudf_include, + sources=CYTHON_FILES, + include_dirs=[NUMPY_INCLUDE, + CUDF_INCLUDE, NVGRAPH['include'], CUDA['include'], '../cpp/src', @@ -107,19 +108,19 @@ def locate_nvgraph(): description="cuGraph - GPU Graph Analytics", version=versioneer.get_version(), classifiers=[ - # "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - # "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7" + # "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + # "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7" ], # Include the separately-compiled shared library author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=cythonize(extensions), - install_requires=install_requires, + ext_modules=cythonize(EXTENSIONS), + install_requires=INSTALL_REQUIRES, license="Apache", cmdclass=versioneer.get_cmdclass(), zip_safe=False - ) + ) From 97421273058c2edbebc2206d35e76d5b78478211 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 23:21:15 -0800 Subject: [PATCH 03/26] temporarily suppress deprecation warning (python 3.7) in importing networkx --- python/cugraph/jaccard/test_jaccard.py | 11 ++++++++++- python/cugraph/louvain/test_louvain.py | 13 +++++++++++-- python/cugraph/pagerank/test_pagerank.py | 11 ++++++++++- python/cugraph/sssp/test_sssp.py | 12 +++++++++++- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index 919c59e439b..f06edc6d9f1 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -13,13 +13,22 @@ import time -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index dc20714437c..70329af847c 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -13,14 +13,23 @@ import time -import community -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, these import community and import networkx need to be +# relocated in the third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import community + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index 642c6f9f5f3..989a6626085 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -13,13 +13,22 @@ import time -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index b139c76d311..cc638597812 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -13,7 +13,6 @@ import time -import networkx as nx import numpy as np import pytest from scipy.io import mmread @@ -21,6 +20,17 @@ import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + print('Networkx version : {} '.format(nx.__version__)) From cce4912e0500e9ab7ee79a30bfb0785fe93459f5 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 1 Mar 2019 11:51:06 -0800 Subject: [PATCH 04/26] fixed a segmentation fault error when invoking view_edge_list on an empty graph --- python/cugraph/graph/c_graph.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index ff5d97b396d..895010d56ae 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -122,7 +122,9 @@ class Graph: """ cdef uintptr_t graph = self.graph_ptr cdef gdf_graph * g = < gdf_graph *> graph - gdf_add_edge_list(g) + err = gdf_add_edge_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size = g.edgeList.src_indices.size cdef uintptr_t src_col_data = < uintptr_t > g.edgeList.src_indices.data @@ -171,10 +173,10 @@ class Graph: Compute the adjacency list from edge list and return offsets and indices as cudf Series. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_add_adj_list(< gdf_graph *> graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef gdf_graph * g = < gdf_graph *> graph + err = gdf_add_adj_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size_off = g.adjList.offsets.size col_size_ind = g.adjList.indices.size From b1f4e02bdcddfff3de20aee749cedf490187a19a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 1 Mar 2019 18:12:37 -0800 Subject: [PATCH 05/26] fixed a 'terminate called after throwing an instance of 'thrust::system::system_error' what(): radidx_sort: failed on 2nd step: invalid device pointer' error on test_graph.py --- cpp/include/rmm_utils.h | 2 +- cpp/include/types.h | 2 +- cpp/src/cugraph.cu | 50 +++++++++++++++--------------- python/cugraph/graph/c_graph.pxd | 22 ++++++------- python/cugraph/graph/c_graph.pyx | 27 +++++++++------- python/cugraph/graph/test_graph.py | 2 +- 6 files changed, 54 insertions(+), 51 deletions(-) diff --git a/cpp/include/rmm_utils.h b/cpp/include/rmm_utils.h index b940392cf94..12b1b988fb6 100755 --- a/cpp/include/rmm_utils.h +++ b/cpp/include/rmm_utils.h @@ -45,7 +45,7 @@ class rmm_allocator : public thrust::device_malloc_allocator ~rmm_allocator() {} private: - cudaStream_t stream; + cudaStream_t stream; }; using rmm_temp_allocator = rmm_allocator; // Use this alias for thrust::cuda::par(allocator).on(stream) diff --git a/cpp/include/types.h b/cpp/include/types.h index c9e3de8ad8d..509e035b4b6 100644 --- a/cpp/include/types.h +++ b/cpp/include/types.h @@ -94,7 +94,7 @@ struct gdf_graph{ gdf_dynamic *dynAdjList; //dynamic gdf_graph_properties *prop; gdf_graph() : edgeList(nullptr), adjList(nullptr), transposedAdjList(nullptr), dynAdjList(nullptr), prop(nullptr) {} - ~gdf_graph() { + ~gdf_graph() { if (edgeList) delete edgeList; if (adjList) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 47f9ddf527f..2cb144db135 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -32,11 +32,11 @@ void gdf_col_delete(gdf_column* col) { delete col; col->data = nullptr; col = nullptr; - } + } } void gdf_col_release(gdf_column* col) { - delete col; + delete col; } void cpy_column_view(const gdf_column *in, gdf_column *out) { @@ -284,8 +284,8 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph, gdf_error gdf_add_adj_list(gdf_graph *graph) { - if (graph->adjList != nullptr) - return GDF_SUCCESS; + if (graph->adjList != nullptr) + return GDF_SUCCESS; GDF_REQUIRE( graph->edgeList != nullptr , GDF_INVALID_API_CALL); GDF_REQUIRE( graph->adjList == nullptr , GDF_INVALID_API_CALL); @@ -304,8 +304,8 @@ gdf_error gdf_add_adj_list(gdf_graph *graph) gdf_error gdf_add_transpose(gdf_graph *graph) { - if (graph->edgeList == nullptr) - gdf_add_edge_list(graph); + if (graph->edgeList == nullptr) + gdf_add_edge_list(graph); if (graph->edgeList->edge_data != nullptr) { switch (graph->edgeList->edge_data->dtype) { case GDF_FLOAT32: return gdf_add_transpose_impl(graph); @@ -349,23 +349,23 @@ gdf_error gdf_pagerank(gdf_graph *graph, gdf_column *pagerank, float alpha, floa } gdf_error gdf_bfs(gdf_graph *graph, gdf_column *distances, gdf_column *predecessors, int start_node, bool directed) { - GDF_REQUIRE(graph->adjList != nullptr, GDF_VALIDITY_UNSUPPORTED); - GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - - int n = graph->adjList->offsets->size - 1; - int e = graph->adjList->indices->size; - int* offsets_ptr = (int*)graph->adjList->offsets->data; - int* indices_ptr = (int*)graph->adjList->indices->data; - int* distances_ptr = (int*)distances->data; - int* predecessors_ptr = (int*)predecessors->data; - int alpha = 15; - int beta = 18; - - cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); - bfs.configure(distances_ptr, predecessors_ptr, nullptr); - bfs.traverse(start_node); - return GDF_SUCCESS; + GDF_REQUIRE(graph->adjList != nullptr, GDF_VALIDITY_UNSUPPORTED); + GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + + int n = graph->adjList->offsets->size - 1; + int e = graph->adjList->indices->size; + int* offsets_ptr = (int*)graph->adjList->offsets->data; + int* indices_ptr = (int*)graph->adjList->indices->data; + int* distances_ptr = (int*)distances->data; + int* predecessors_ptr = (int*)predecessors->data; + int alpha = 15; + int beta = 18; + + cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); + bfs.configure(distances_ptr, predecessors_ptr, nullptr); + bfs.traverse(start_node); + return GDF_SUCCESS; } diff --git a/python/cugraph/graph/c_graph.pxd b/python/cugraph/graph/c_graph.pxd index 5219813498d..025761b827b 100755 --- a/python/cugraph/graph/c_graph.pxd +++ b/python/cugraph/graph/c_graph.pxd @@ -2,7 +2,7 @@ from libcpp cimport bool cdef extern from "cudf.h": - ctypedef enum gdf_error: + ctypedef enum gdf_error: pass ctypedef enum gdf_dtype: @@ -13,9 +13,9 @@ cdef extern from "cudf.h": GDF_INT64, GDF_FLOAT32, GDF_FLOAT64, - GDF_DATE32, - GDF_DATE64, - GDF_TIMESTAMP, + GDF_DATE32, + GDF_DATE64, + GDF_TIMESTAMP, GDF_CATEGORY, GDF_STRING, N_GDF_TYPES @@ -24,18 +24,18 @@ cdef extern from "cudf.h": ctypedef size_t gdf_size_type struct gdf_column_: - void *data + void *data gdf_valid_type *valid - gdf_size_type size + gdf_size_type size gdf_dtype dtype gdf_size_type null_count ctypedef gdf_column_ gdf_column - cdef gdf_error gdf_column_view_augmented(gdf_column *column, - void *data, + cdef gdf_error gdf_column_view_augmented(gdf_column *column, + void *data, gdf_valid_type *valid, - gdf_size_type size, + gdf_size_type size, gdf_dtype dtype, gdf_size_type null_count) @@ -60,13 +60,13 @@ cdef extern from "cugraph.h": gdf_adj_list *transposedAdjList - cdef gdf_error gdf_edge_list_view(gdf_graph *graph, + cdef gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *source_indices, const gdf_column *destination_indices, const gdf_column *edge_data) cdef gdf_error gdf_add_edge_list(gdf_graph *graph) cdef gdf_error gdf_delete_edge_list(gdf_graph *graph) - cdef gdf_error gdf_adj_list_view (gdf_graph *graph, + cdef gdf_error gdf_adj_list_view (gdf_graph *graph, const gdf_column *offsets, const gdf_column *indices, const gdf_column *edge_data) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 895010d56ae..fb035ab1b32 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -132,12 +132,13 @@ class Graph: src_data = rmm.device_array_from_ptr(src_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(src_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(src_col_data, 0)) dest_data = rmm.device_array_from_ptr(dest_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(dest_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(dest_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(src_data), cudf.Series(dest_data) @@ -185,12 +186,13 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=col_size_off, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=col_size_ind, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -212,12 +214,13 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=off_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=ind_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(offsets_data), cudf.Series(indices_data) diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index b884fba3298..f28cf0084f4 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -114,7 +114,7 @@ def test_transpose_from_adj_list(graph_file): G.add_transpose() Mt = M.transpose().tocsr() toff, tind = G.view_transpose_adj_list() - assert compare_series(Mt.indices, tind) + assert compare_series(tind, Mt.indices) assert compare_offsets(toff, Mt.indptr) From 0b9f2ff58494498023aba28fc0e84c4e10d2daed Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:10:26 -0800 Subject: [PATCH 06/26] fixed memory leaks in class Graph --- python/cugraph/graph/c_graph.pyx | 68 ++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index fb035ab1b32..2b7cd3da9f6 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -19,11 +19,11 @@ import cudf from librmm_cffi import librmm as rmm import numpy as np + dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} + cdef create_column(col): - - x = < gdf_column *> malloc(sizeof(gdf_column)) cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column)) cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) @@ -39,6 +39,14 @@ cdef create_column(col): cdef uintptr_t col_ptr = < uintptr_t > c_col return col_ptr + +cdef delete_column(col_ptr): + cdef uintptr_t col = col_ptr + cdef gdf_column * c_col = < gdf_column *> col + free(c_col) + return + + class Graph: """ cuGraph graph class containing basic graph creation and transformation operations. @@ -53,12 +61,22 @@ class Graph: >>> import cuGraph >>> G = cuGraph.Graph() """ - cdef gdf_graph * graph - graph = < gdf_graph *> calloc(1, sizeof(gdf_graph)) + print("Invoking __init__") + cdef gdf_graph * g + g = < gdf_graph *> calloc(1, sizeof(gdf_graph)) - cdef uintptr_t graph_ptr = < uintptr_t > graph + cdef uintptr_t graph_ptr = < uintptr_t > g self.graph_ptr = graph_ptr + def __del__(self): + print("Invoking __dealloc__") + cdef uintptr_t graph = self.graph_ptr + cdef gdf_graph * g = < gdf_graph *> graph + self.delete_edge_list() + self.delete_adj_list() + self.delete_transpose() + free(g) + def add_edge_list(self, source_col, dest_col, value_col=None): """ Wrap existing gdf columns representing an edge list in a gdf_graph. cuGraph @@ -100,18 +118,24 @@ class Graph: else: value = create_column(value_col) - err = gdf_edge_list_view(< gdf_graph *> graph, - < gdf_column *> source, - < gdf_column *> dest, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) - + try: + err = gdf_edge_list_view(< gdf_graph *> graph, + < gdf_column *> source, + < gdf_column *> dest, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(source) + delete_column(dest) + if value is not 0: + delete_column(value) + def num_vertices(self): """ Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = graph + cdef gdf_graph* g = < gdf_graph *> graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) return g.adjList.offsets.size - 1 @@ -162,12 +186,18 @@ class Graph: value = 0 else: value = create_column(value_col) - - err = gdf_adj_list_view(< gdf_graph *> graph, - < gdf_column *> offsets, - < gdf_column *> indices, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) + + try: + err = gdf_adj_list_view(< gdf_graph *> graph, + < gdf_column *> offsets, + < gdf_column *> indices, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(offsets) + delete_column(indices) + if value is not 0: + delete_column(value) def view_adj_list(self): """ @@ -248,5 +278,3 @@ class Graph: cdef uintptr_t graph = self.graph_ptr err = gdf_delete_transpose(< gdf_graph *> graph) cudf.bindings.cudf_cpp.check_gdf_error(err) - - From 169b6fe89d1d10824f9bf2bfee165319d26363d1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:12:40 -0800 Subject: [PATCH 07/26] modified test_grmat.py to execute a test case in pytest (this just tests R-mat graph generation successfully returns, does not really validate that the output graph is a valid R-mat graph) --- python/cugraph/grmat/test_grmat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cugraph/grmat/test_grmat.py b/python/cugraph/grmat/test_grmat.py index 0db1ef816d5..dd458550dd3 100644 --- a/python/cugraph/grmat/test_grmat.py +++ b/python/cugraph/grmat/test_grmat.py @@ -14,5 +14,7 @@ import cugraph -vertices, edges, sources, destinations = cugraph.grmat_gen( - 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') +def test_grmat_gen(): + vertices, edges, sources, destinations = cugraph.grmat_gen( + 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized' + ' --quiet') From 35c7ff629a69666079cb8de401672b806ea20eee Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:13:47 -0800 Subject: [PATCH 08/26] set the seed value for a random number generator to eliminate non-determinism in tests --- python/cugraph/spectral_clustering/test_balanced_cut.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 8a719eefcc6..54ce7585639 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -33,6 +33,7 @@ def cugraph_call(G, partitions): def random_call(G, partitions): + random.seed(0) num_verts = G.num_vertices() assignment = [] for i in range(num_verts): From d1f460e54a76645e6aaaae5079f570192a26e51c Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:37:39 -0800 Subject: [PATCH 09/26] fixed a problem in gdf_col_delete (accessing memory after deallocation is dangerous) --- cpp/src/cugraph.cu | 13 ++++++++++++- cpp/src/tests/test_utils.h | 13 ++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 2cb144db135..c95b347a814 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -29,9 +29,20 @@ void gdf_col_delete(gdf_column* col) { { ALLOC_FREE_TRY(col->data, nullptr); } +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; + col = nullptr; +#endif } } diff --git a/cpp/src/tests/test_utils.h b/cpp/src/tests/test_utils.h index 881c848c45e..26ec576b3c5 100644 --- a/cpp/src/tests/test_utils.h +++ b/cpp/src/tests/test_utils.h @@ -663,8 +663,19 @@ void gdf_col_delete(gdf_column* col) { cudaStream_t stream{nullptr}; if(col->data) ALLOC_FREE_TRY(col->data, stream); +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; + col = nullptr; +#endif } } From a4840f8dda02f82b19917d8b1b128781c816b782 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:48:36 -0800 Subject: [PATCH 10/26] additional flake8 fixes --- python/cugraph/graph/test_graph.py | 3 ++- python/setup.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index f28cf0084f4..f773b4a48e2 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -38,7 +38,8 @@ def compare_series(series_1, series_2): def compare_offsets(offset0, offset1): if not (len(offset0) <= len(offset1)): - print("Mismatched length: " + str(len(offset0)) + " != " + str(len(offset1))) + print("Mismatched length: " + str(len(offset0)) + " != " + + str(len(offset1))) return False for i in range(len(offset0)): if offset0[i] != offset1[i]: diff --git a/python/setup.py b/python/setup.py index 0df50e480c1..94e6358e930 100644 --- a/python/setup.py +++ b/python/setup.py @@ -122,5 +122,4 @@ def locate_nvgraph(): install_requires=INSTALL_REQUIRES, license="Apache", cmdclass=versioneer.get_cmdclass(), - zip_safe=False - ) + zip_safe=False) From 21679807344b0c2dba3b4b06e277bb73a6ff806a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 08:43:04 -0800 Subject: [PATCH 11/26] update comments on disabling free on un-owned memory --- python/cugraph/graph/c_graph.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 2b7cd3da9f6..a74d2619062 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -162,7 +162,9 @@ class Graph: nelem=col_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(dest_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.edgeList.src_indices.data and g.edgeList.dest_indices.data are not + # owned by this instance, so should not be freed here (this will lead + # to double free, and undefined behavior). return cudf.Series(src_data), cudf.Series(dest_data) @@ -222,7 +224,9 @@ class Graph: nelem=col_size_ind, dtype=np.int32) # , # finalizer=rmm._make_finalizer(indices_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.adjList.offsets.data and g.adjList.indices.data are not owned by + # this instance, so should not be freed here (this will lead to double + # free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -250,7 +254,9 @@ class Graph: nelem=ind_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(indices_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data + # are not owned by this instance, so should not be freed here (this + # will lead to double free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) From d29186ac62fd636ff3211a79f13690fca1a0c550 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 08:52:50 -0800 Subject: [PATCH 12/26] CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 952cbfa9bd3..530c9167f33 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,11 +21,13 @@ - PR #95 Code clean up - PR #96 Relocated mmio.c and mmio.h (external files) to thirdparty/mmio - PR #97 Updated python tests to speed them up +- PR #105 Updated ptyhton code to follow PEP8 (fixed flake8 complaints) ## Bug Fixes - PR #48 ABI Fixes - PR #72 Bug fix for segfault issue getting transpose from adjacency list +- PR #105 Bug fix for memory leaks and python test failures # cuGraph 0.5.0 (28 Jan 2019) From af9d9663217035972f523d4d416c9b5be26c5dbf Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 15:54:10 -0800 Subject: [PATCH 13/26] fixed flake8 warnings and errors --- python/cugraph/bfs/test_bfs.py | 51 ++++++++++-------- python/cugraph/graph/test_graph.py | 51 ++++++++++-------- python/cugraph/grmat/test_grmat.py | 4 +- python/cugraph/jaccard/test_jaccard.py | 53 ++++++++++--------- python/cugraph/jaccard/test_wjaccard.py | 42 +++++++-------- python/cugraph/louvain/test_louvain.py | 35 ++++++------ python/cugraph/pagerank/test_pagerank.py | 32 +++++------ .../spectral_clustering/test_balanced_cut.py | 32 ++++++----- .../spectral_clustering/test_modularity.py | 30 ++++++----- python/cugraph/sssp/test_sssp.py | 2 +- python/setup.py | 41 ++++++++------ 11 files changed, 202 insertions(+), 171 deletions(-) diff --git a/python/cugraph/bfs/test_bfs.py b/python/cugraph/bfs/test_bfs.py index e75daa8b855..07cedd4fa2e 100644 --- a/python/cugraph/bfs/test_bfs.py +++ b/python/cugraph/bfs/test_bfs.py @@ -11,41 +11,44 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import time -from scipy.io import mmread import pytest +import queue +import time import numpy as np +from scipy.io import mmread +import cudf +import cugraph + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() -def cugraph_Call(M, start_vertex): - +def cugraph_call(M, start_vertex): # Device data M = M.tocsr() sources = cudf.Series(M.indptr) destinations = cudf.Series(M.indices) values = cudf.Series(M.data) - + G = cugraph.Graph() G.add_adj_list(sources, destinations, values) - + t1 = time.time() df = cugraph.bfs(G, start_vertex) t2 = time.time() - t1 print('Time : '+str(t2)) - + # Return distances as np.array() return df['vertex'].to_array(), df['distance'].to_array() -def base_Call(M, start_vertex): - intMax = 2147483647 +def base_call(M, start_vertex): + int_max = 2**31 - 1 + M = M.tocsr() + offsets = M.indptr indices = M.indices num_verts = len(offsets) - 1 @@ -53,33 +56,35 @@ def base_Call(M, start_vertex): vertex = list(range(num_verts)) for i in range(num_verts): - dist[i] = intMax - import queue + dist[i] = int_max + q = queue.Queue() q.put(start_vertex) dist[start_vertex] = 0 while(not q.empty()): u = q.get() - for iCol in range(offsets[u],offsets[u + 1]): + for iCol in range(offsets[u], offsets[u + 1]): v = indices[iCol] - if (dist[v] == intMax): + if (dist[v] == int_max): dist[v] = dist[u] + 1 q.put(v) + return vertex, dist + datasets = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/polbooks.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + @pytest.mark.parametrize('graph_file', datasets) def test_bfs(graph_file): - M = ReadMtxFile(graph_file) - base_v_id, base_dist = base_Call(M, 0) - v_id, dist = cugraph_Call(M, 0) - - assert len(base_dist) == len(dist) - for i in range(len(dist)): - assert base_v_id[i] == v_id[i] - assert base_dist[i] == dist[i] + base_vid, base_dist = base_call(M, 0) + cugraph_vid, cugraph_dist = cugraph_call(M, 0) + + assert len(base_dist) == len(cugraph_dist) + for i in range(len(cugraph_dist)): + assert base_vid[i] == cugraph_vid[i] + assert base_dist[i] == cugraph_dist[i] diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index 7b9097e76c2..53c5a02c113 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -17,34 +17,41 @@ import numpy as np from scipy.io import mmread + def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() + def compare_series(series_1, series_2): if (len(series_1) != len(series_2)): print("Series do not match in length") return 0 for i in range(len(series_1)): if(series_1[i] != series_2[i]): - print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + ", " + str(series_2[i])) + print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + + ", " + str(series_2[i])) return 0 return 1 -def compareOffsets(cu, np): + +def compare_offsets(cu, np): if not (len(cu) <= len(np)): print("Mismatched length: " + str(len(cu)) + " != " + str(len(np))) return False for i in range(len(cu)): if cu[i] != np[i]: - print("Series[" + str(i) + "]: " + str(cu[i]) + " != " + str(np[i])) + print("Series[" + str(i) + "]: " + str(cu[i]) + " != " + + str(np[i])) return False return True -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +datasets = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] + @pytest.mark.parametrize('graph_file', datasets) def test_add_edge_list_to_adj_list(graph_file): @@ -53,30 +60,31 @@ def test_add_edge_list_to_adj_list(graph_file): destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets_exp = M.indptr indices_exp = M.indices # cugraph add_egde_list to_adj_list call G = cugraph.Graph() - G.add_edge_list(sources,destinations, None) + G.add_edge_list(sources, destinations, None) offsets_cu, indices_cu = G.view_adj_list() - assert compareOffsets(offsets_cu, offsets_exp) + assert compare_offsets(offsets_cu, offsets_exp) assert compare_series(indices_cu, indices_exp) + @pytest.mark.parametrize('graph_file', datasets) def test_add_adj_list_to_edge_list(graph_file): M = ReadMtxFile(graph_file) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -92,9 +100,10 @@ def test_add_adj_list_to_edge_list(graph_file): destinations_cu = np.array(destinations) assert compare_series(sources_cu, sources_exp) assert compare_series(destinations_cu, destinations_exp) - + + @pytest.mark.parametrize('graph_file', datasets) -def test_transpose_from_adj_list(graph_file): +def test_transpose_from_adj_list(graph_file): M = ReadMtxFile(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) @@ -105,8 +114,9 @@ def test_transpose_from_adj_list(graph_file): Mt = M.transpose().tocsr() toff, tind = G.view_transpose_adj_list() assert compare_series(Mt.indices, tind) - assert compareOffsets(toff, Mt.indptr) - + assert compare_offsets(toff, Mt.indptr) + + @pytest.mark.parametrize('graph_file', datasets) def test_view_edge_list_from_adj_list(graph_file): M = ReadMtxFile(graph_file) @@ -121,7 +131,8 @@ def test_view_edge_list_from_adj_list(graph_file): dst1 = M.col assert compare_series(src1, src2) assert compare_series(dst1, dst2) - + + @pytest.mark.parametrize('graph_file', datasets) def test_delete_edge_list_delete_adj_list(graph_file): M = ReadMtxFile(graph_file) @@ -129,11 +140,11 @@ def test_delete_edge_list_delete_adj_list(graph_file): destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -150,5 +161,3 @@ def test_delete_edge_list_delete_adj_list(graph_file): with pytest.raises(cudf.bindings.GDFError.GDFError) as excinfo: G.view_edge_list() assert excinfo.value.errcode.decode() == 'GDF_INVALID_API_CALL' - - diff --git a/python/cugraph/grmat/test_grmat.py b/python/cugraph/grmat/test_grmat.py index 3d3933b6191..0db1ef816d5 100644 --- a/python/cugraph/grmat/test_grmat.py +++ b/python/cugraph/grmat/test_grmat.py @@ -12,5 +12,7 @@ # limitations under the License. import cugraph -vertices, edges, sources, destinations = cugraph.grmat_gen('grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') + +vertices, edges, sources, destinations = cugraph.grmat_gen( + 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index 690a767245b..5e049fdc8ee 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -13,44 +13,43 @@ import cugraph import cudf -import numpy as np -import sys import time from scipy.io import mmread import networkx as nx -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return df['source'].to_array(), df['destination'].to_array(), df['jaccard_coeff'].to_array() + + def networkx_Call(M): M = M.tocsr() @@ -59,8 +58,9 @@ def networkx_Call(M): destinations = M.col edges = [] for i in range(len(sources)): - edges.append((sources[i],destinations[i])) - # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly + edges.append((sources[i], destinations[i])) + # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this + # explicitly print('Format conversion ... ') # Directed NetworkX graph @@ -71,39 +71,40 @@ def networkx_Call(M): print('Solving... ') t1 = time.time() preds = nx.jaccard_coefficient(Gnx, edges) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) - coeff = [] src = [] dst = [] - for u,v,p in preds: + coeff = [] + for u, v, p in preds: src.append(u) dst.append(v) coeff.append(p) return src, dst, coeff - -datasets = ['/datasets/networks/dolphins.mtx', - '/datasets/networks/karate.mtx' , + +datasets = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_jaccard(graph_file): M = ReadMtxFile(graph_file) cu_src, cu_dst, cu_coeff = cuGraph_Call(M) nx_src, nx_dst, nx_coeff = networkx_Call(M) + # Calculating mismatch err = 0 tol = 1.0e-06 + assert len(cu_coeff) == len(nx_coeff) for i in range(len(cu_coeff)): - if(abs(cu_coeff[i] -nx_coeff[i])>tol*1.1 and cu_src == nx_src and cu_dst == nx_dst): - err+=1 - print("Mismatches: %d" %err) - assert err == 0 - - + if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and cu_src == nx_src + and cu_dst == nx_dst): + err += 1 + print("Mismatches: %d" % err) + assert err == 0 diff --git a/python/cugraph/jaccard/test_wjaccard.py b/python/cugraph/jaccard/test_wjaccard.py index bda9000863d..ac21a85e36e 100644 --- a/python/cugraph/jaccard/test_wjaccard.py +++ b/python/cugraph/jaccard/test_wjaccard.py @@ -14,58 +14,56 @@ import cugraph import cudf import numpy as np -import sys import time from scipy.io import mmread import networkx as nx -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Callw(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - #values = cudf.Series(np.ones(len(col_indices), dtype = np.float32), nan_as_null = False) - weights_arr = cudf.Series(np.ones(len(row_offsets), dtype = np.float32), nan_as_null = False) - + # values = cudf.Series(np.ones(len(col_indices), dtype=np.float32), + # nan_as_null=False) + weights_arr = cudf.Series(np.ones(len(row_offsets), dtype=np.float32), + nan_as_null=False) + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard_w(G, weights_arr) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return df['jaccard_coeff'] - -datasets = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] +datasets = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_wjaccard(graph_file): M = ReadMtxFile(graph_file) - cu_coeff = cuGraph_Callw(M) - - # no NetworkX equivalent to compare against... - - - - + # suppress F841 (local variable is assigned but never used) in flake8 + # no networkX equivalent to compare cu_coeff against... + cu_coeff = cuGraph_Callw(M) # noqa: F841 + # this test is incomplete... diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index 30262c38a91..bbcae88b4f3 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -13,77 +13,76 @@ import cugraph import cudf -import numpy as np -import sys import time from scipy.io import mmread import networkx as nx import community -import os import pytest -print ('Networkx version : {} '.format(nx.__version__)) +print('Networkx version : {} '.format(nx.__version__)) def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') + print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G = cugraph.Graph() - G.add_adj_list(row_offsets, col_indices, values) + G.add_adj_list(row_offsets, col_indices, values) # cugraph Louvain Call t1 = time.time() parts, mod = cugraph.nvLouvain(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts, mod + def networkx_Call(M): M = M.tocsr() # Directed NetworkX graph Gnx = nx.Graph(M) - #z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} + # z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} # Networkx Jaccard Call print('Solving... ') t1 = time.time() parts = community.best_partition(Gnx) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts - -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +datasets = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', datasets) def test_louvain(graph_file): M = ReadMtxFile(graph_file) cu_parts, cu_mod = cuGraph_Call(M) - nx_parts = networkx_Call(M) + nx_parts = networkx_Call(M) + # Calculating modularity scores for comparison Gnx = nx.Graph(M) - cu_map = {0:0} + cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] assert set(nx_parts.keys()) == set(cu_map.keys()) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index eaf6fdaa73f..5620689ec1c 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -11,12 +11,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf +import pytest import time -from scipy.io import mmread import networkx as nx -import pytest +from scipy.io import mmread +import cudf +import cugraph print('Networkx version : {} '.format(nx.__version__)) @@ -27,12 +27,11 @@ def ReadMtxFile(mmFile): def cugraph_Call(M, max_iter, tol, alpha): - # Device data sources = cudf.Series(M.row) destinations = cudf.Series(M.col) # values = cudf.Series(np.ones(len(sources), dtype = np.float64)) - + # cugraph Pagerank Call G = cugraph.Graph() G.add_edge_list(sources, destinations, None) @@ -51,7 +50,6 @@ def cugraph_Call(M, max_iter, tol, alpha): def networkx_Call(M, max_iter, tol, alpha): - nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} for nnz in range(M.getnnz()): nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] @@ -97,25 +95,27 @@ def networkx_Call(M, max_iter, tol, alpha): '/datasets/networks/karate.mtx' , '/datasets/networks/netscience.mtx'] -Max_Iterations = [500] +max_iterations = [500] tolerance = [1.0e-06] alpha = [0.85] @pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('max_iter', Max_Iterations) +@pytest.mark.parametrize('max_iter', max_iterations) @pytest.mark.parametrize('tol', tolerance) @pytest.mark.parametrize('alpha', alpha) def test_pagerank(graph_file, max_iter, tol, alpha): - M = ReadMtxFile(graph_file) - sorted_pr = cugraph_Call(M, max_iter, tol, alpha) - items = networkx_Call(M, max_iter, tol, alpha) + + networkx_pr = networkx_Call(M, max_iter, tol, alpha) + cugraph_pr = cugraph_Call(M, max_iter, tol, alpha) + # Calculating mismatch + err = 0 - # assert len(sorted_pr) == len(items) - for i in range(len(sorted_pr)): - if(abs(sorted_pr[i][1]-items[i][1]) > tol*1.1 and sorted_pr[i][0] == items[i][0]): + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if(abs(cugraph_pr[i][1]-networkx_pr[i][1]) > tol*1.1 and sorted_ptr[i][0] == networkx_pr[i][0]): err = err + 1 print(err) - assert err < (0.01*len(sorted_pr)) + assert err < (0.01*len(cugraph_pr)) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 209b4c33bd7..90d792e5b96 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -13,37 +13,40 @@ import cugraph import cudf -import numpy as np -import sys -import time from scipy.io import mmread -import community -import os import pytest import random + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(G, partitions): - df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) + df = cugraph.spectralBalancedCutClustering(G, partitions, + num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df['cluster']) return set(df['vertex'].to_array()), score + def random_Call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return set(range(num_verts)), score -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] +datasets = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] partitions = [2, 4, 8] + + @pytest.mark.parametrize('graph_file', datasets) @pytest.mark.parametrize('partitions', partitions) def test_modularityClustering(graph_file, partitions): @@ -54,10 +57,11 @@ def test_modularityClustering(graph_file, partitions): values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment - cu_v_id, cu_score = cuGraph_Call(G, partitions) - v_id, rand_score = random_Call(G, partitions) - assert cu_v_id == v_id - # Assert that the partitioning has better modularity than the random assignment + cu_vid, cu_score = cuGraph_Call(G, partitions) + rand_vid, rand_score = random_Call(G, partitions) + assert cu_vid == rand_vid + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score < rand_score diff --git a/python/cugraph/spectral_clustering/test_modularity.py b/python/cugraph/spectral_clustering/test_modularity.py index 0238be2a194..f39ff838af5 100644 --- a/python/cugraph/spectral_clustering/test_modularity.py +++ b/python/cugraph/spectral_clustering/test_modularity.py @@ -13,37 +13,40 @@ import cugraph import cudf -import numpy as np -import sys -import time from scipy.io import mmread -import community -import os import pytest import random + def ReadMtxFile(mmFile): print('Reading ' + str(mmFile) + '...') return mmread(mmFile).asfptype() - + def cuGraph_Call(G, partitions): - df = cugraph.spectralModularityMaximizationClustering(G, partitions, num_eigen_vects=(partitions - 1)) + df = cugraph.spectralModularityMaximizationClustering( + G, partitions, num_eigen_vects=(partitions - 1)) score = cugraph.analyzeClustering_modularity(G, partitions, df['cluster']) return score + def random_Call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_modularity(G, partitions, assignment_cu) return score - -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] + +datasets = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] partitions = [2, 4, 8] + + @pytest.mark.parametrize('graph_file', datasets) @pytest.mark.parametrize('partitions', partitions) def test_modularityClustering(graph_file, partitions): @@ -54,10 +57,11 @@ def test_modularityClustering(graph_file, partitions): values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment cu_score = cuGraph_Call(G, partitions) rand_score = random_Call(G, partitions) - - # Assert that the partitioning has better modularity than the random assignment + + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score > rand_score diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index 6c3cb279804..69b560339aa 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -71,7 +71,7 @@ def networkx_Call(M, source): print('NX Solving... ') t1 = time.time() - path=nx.single_source_shortest_path(Gnx, source) + path = nx.single_source_shortest_path(Gnx, source) t2 = time.time() - t1 diff --git a/python/setup.py b/python/setup.py index d56904c1509..886d4194dfd 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,6 +1,6 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from setuptools import setup, find_packages +from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize import numpy @@ -11,20 +11,20 @@ import os import sys -install_requires = [ - 'numba', - 'cython' -] +install_requires = ['numba', 'cython'] + def find_in_path(name, path): "Find a file in a search path" - #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ + # adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path for dir in path.split(os.pathsep): binpath = pjoin(dir, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None + def locate_cuda(): """Locate the CUDA environment on the system Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' @@ -41,30 +41,39 @@ def locate_cuda(): # otherwise, search the PATH for NVCC nvcc = find_in_path('nvcc', os.environ['PATH']) if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + raise EnvironmentError( + 'The nvcc binary could not be located in your $PATH. ' + 'Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) - cudaconfig = {'home':home, 'nvcc':nvcc, + cudaconfig = {'home': home, 'nvcc': nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} for k, v in iter(cudaconfig.items()): if not os.path.exists(v): - raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + raise EnvironmentError( + 'The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig + def locate_nvgraph(): if 'CONDA_PREFIX' in os.environ: - nvgraph_found = find_in_path('lib/libnvgraph_st.so', os.environ['CONDA_PREFIX']) + nvgraph_found = find_in_path('lib/libnvgraph_st.so', + os.environ['CONDA_PREFIX']) if nvgraph_found is None: - nvgraph_found = find_in_path('libnvgraph_st.so', os.environ['LD_LIBRARY_PATH']) + nvgraph_found = find_in_path('libnvgraph_st.so', + os.environ['LD_LIBRARY_PATH']) if nvgraph_found is None: - raise EnvironmentError('The nvgraph library could not be located') - nvgraph_config = {'include':pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), 'include', 'nvgraph'), - 'lib':os.path.dirname(nvgraph_found)} + raise EnvironmentError('The nvgraph library could not be located') + nvgraph_config = { + 'include': pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), + 'include', 'nvgraph'), + 'lib': os.path.dirname(nvgraph_found)} + return nvgraph_config - + + CUDA = locate_cuda() NVGRAPH = locate_nvgraph() From d867ce9771572615cf5042d6b92f79a0c5dd5474 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 22:52:18 -0800 Subject: [PATCH 14/26] fixed pylint errors and warnings (except for ones related to missing docstring, short variable names, superfluous parens, and consider using enumerates). --- python/cugraph/bfs/test_bfs.py | 23 +++++--- python/cugraph/graph/test_graph.py | 49 ++++++++-------- python/cugraph/jaccard/test_jaccard.py | 29 +++++----- python/cugraph/jaccard/test_wjaccard.py | 27 +++++---- python/cugraph/louvain/test_louvain.py | 31 +++++----- python/cugraph/pagerank/test_pagerank.py | 39 +++++++------ .../spectral_clustering/test_balanced_cut.py | 38 +++++++------ .../spectral_clustering/test_modularity.py | 38 +++++++------ python/cugraph/sssp/test_sssp.py | 32 ++++++----- python/setup.py | 57 ++++++++++--------- 10 files changed, 194 insertions(+), 169 deletions(-) diff --git a/python/cugraph/bfs/test_bfs.py b/python/cugraph/bfs/test_bfs.py index 07cedd4fa2e..f52a80ce12e 100644 --- a/python/cugraph/bfs/test_bfs.py +++ b/python/cugraph/bfs/test_bfs.py @@ -11,18 +11,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import queue import time + import numpy as np +import pytest from scipy.io import mmread + import cudf import cugraph -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() def cugraph_call(M, start_vertex): @@ -63,8 +65,8 @@ def base_call(M, start_vertex): dist[start_vertex] = 0 while(not q.empty()): u = q.get() - for iCol in range(offsets[u], offsets[u + 1]): - v = indices[iCol] + for i_col in range(offsets[u], offsets[u + 1]): + v = indices[i_col] if (dist[v] == int_max): dist[v] = dist[u] + 1 q.put(v) @@ -72,18 +74,21 @@ def base_call(M, start_vertex): return vertex, dist -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/polbooks.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_bfs(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) + base_vid, base_dist = base_call(M, 0) cugraph_vid, cugraph_dist = cugraph_call(M, 0) + # Calculating mismatch + assert len(base_dist) == len(cugraph_dist) for i in range(len(cugraph_dist)): assert base_vid[i] == cugraph_vid[i] diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index 53c5a02c113..b884fba3298 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -11,16 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import pytest import numpy as np +import pytest from scipy.io import mmread +import cugraph +import cudf + -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() def compare_series(series_1, series_2): @@ -35,27 +36,27 @@ def compare_series(series_1, series_2): return 1 -def compare_offsets(cu, np): - if not (len(cu) <= len(np)): - print("Mismatched length: " + str(len(cu)) + " != " + str(len(np))) +def compare_offsets(offset0, offset1): + if not (len(offset0) <= len(offset1)): + print("Mismatched length: " + str(len(offset0)) + " != " + str(len(offset1))) return False - for i in range(len(cu)): - if cu[i] != np[i]: - print("Series[" + str(i) + "]: " + str(cu[i]) + " != " - + str(np[i])) + for i in range(len(offset0)): + if offset0[i] != offset1[i]: + print("Series[" + str(i) + "]: " + str(offset0[i]) + " != " + + str(offset1[i])) return False return True -datasets = ['/datasets/networks/karate.mtx', +DATASETS = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_edge_list_to_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) @@ -76,9 +77,9 @@ def test_add_edge_list_to_adj_list(graph_file): assert compare_series(indices_cu, indices_exp) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_adj_list_to_edge_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -102,9 +103,9 @@ def test_add_adj_list_to_edge_list(graph_file): assert compare_series(destinations_cu, destinations_exp) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_transpose_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -117,9 +118,9 @@ def test_transpose_from_adj_list(graph_file): assert compare_offsets(toff, Mt.indptr) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_view_edge_list_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -133,9 +134,9 @@ def test_view_edge_list_from_adj_list(graph_file): assert compare_series(dst1, dst2) -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index 5e049fdc8ee..dec0bfb4389 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -11,22 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread + import networkx as nx import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -50,7 +53,7 @@ def cuGraph_Call(M): -def networkx_Call(M): +def networkx_call(M): M = M.tocsr() M = M.tocoo() @@ -84,17 +87,17 @@ def networkx_Call(M): return src, dst, coeff -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_jaccard(graph_file): - M = ReadMtxFile(graph_file) - cu_src, cu_dst, cu_coeff = cuGraph_Call(M) - nx_src, nx_dst, nx_coeff = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_src, icu_dst, cu_coeff = cugraph_call(M) + nx_src, nx_dst, nx_coeff = networkx_call(M) # Calculating mismatch err = 0 diff --git a/python/cugraph/jaccard/test_wjaccard.py b/python/cugraph/jaccard/test_wjaccard.py index ac21a85e36e..9a4315893d5 100644 --- a/python/cugraph/jaccard/test_wjaccard.py +++ b/python/cugraph/jaccard/test_wjaccard.py @@ -11,23 +11,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np import time -from scipy.io import mmread + import networkx as nx +import numpy as np import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Callw(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -54,16 +57,16 @@ def cuGraph_Callw(M): return df['jaccard_coeff'] -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_wjaccard(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) # suppress F841 (local variable is assigned but never used) in flake8 # no networkX equivalent to compare cu_coeff against... - cu_coeff = cuGraph_Callw(M) # noqa: F841 + cu_coeff = cugraph_call(M) # noqa: F841 # this test is incomplete... diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index bbcae88b4f3..1a2ad83101d 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -11,23 +11,26 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread -import networkx as nx + import community +import networkx as nx import pytest +from scipy.io import mmread + +import cudf +import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() if M is None: raise TypeError('Could not read the input graph') @@ -51,7 +54,7 @@ def cuGraph_Call(M): return parts, mod -def networkx_Call(M): +def networkx_call(M): M = M.tocsr() # Directed NetworkX graph @@ -69,16 +72,16 @@ def networkx_Call(M): return parts -datasets = ['/datasets/networks/karate.mtx', +DATASETS = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_louvain(graph_file): - M = ReadMtxFile(graph_file) - cu_parts, cu_mod = cuGraph_Call(M) - nx_parts = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_parts, cu_mod = cugraph_call(M) + nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.Graph(M) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index 5620689ec1c..b14542400b0 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -11,22 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import pytest import time + import networkx as nx +import pytest from scipy.io import mmread + import cudf import cugraph + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, max_iter, tol, alpha): +def cugraph_call(M, max_iter, tol, alpha): # Device data sources = cudf.Series(M.row) destinations = cudf.Series(M.col) @@ -49,7 +52,7 @@ def cugraph_Call(M, max_iter, tol, alpha): return sorted(sorted_pr, key=lambda x: x[1], reverse=True) -def networkx_Call(M, max_iter, tol, alpha): +def networkx_call(M, max_iter, tol, alpha): nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} for nnz in range(M.getnnz()): nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] @@ -91,24 +94,24 @@ def networkx_Call(M, max_iter, tol, alpha): return sorted(pr.items(), key=lambda x: x[1], reverse=True) -datasets = ['/datasets/networks/dolphins.mtx', - '/datasets/networks/karate.mtx' , +DATASETS = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -max_iterations = [500] -tolerance = [1.0e-06] -alpha = [0.85] +MAX_ITERATIONS = [500] +TOLERANCE = [1.0e-06] +ALPHA = [0.85] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('max_iter', max_iterations) -@pytest.mark.parametrize('tol', tolerance) -@pytest.mark.parametrize('alpha', alpha) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('max_iter', MAX_ITERATIONS) +@pytest.mark.parametrize('tol', TOLERANCE) +@pytest.mark.parametrize('alpha', ALPHA) def test_pagerank(graph_file, max_iter, tol, alpha): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) - networkx_pr = networkx_Call(M, max_iter, tol, alpha) - cugraph_pr = cugraph_Call(M, max_iter, tol, alpha) + networkx_pr = networkx_call(M, max_iter, tol, alpha) + cugraph_pr = cugraph_call(M, max_iter, tol, alpha) # Calculating mismatch diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 90d792e5b96..07b80ce3373 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -11,26 +11,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -from scipy.io import mmread -import pytest import random +import pytest +from scipy.io import mmread + +import cudf +import cugraph -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(G, partitions): + +def cugraph_call(G, partitions): df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df['cluster']) return set(df['vertex'].to_array()), score -def random_Call(G, partitions): +def random_call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): @@ -40,18 +42,18 @@ def random_Call(G, partitions): return set(range(num_verts)), score -datasets = [ +DATASETS = [ '/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] +PARTITIONS = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) @@ -59,9 +61,9 @@ def test_modularityClustering(graph_file, partitions): G.add_adj_list(row_offsets, col_indices, values) # Get the modularity score for partitioning versus random assignment - cu_vid, cu_score = cuGraph_Call(G, partitions) - rand_vid, rand_score = random_Call(G, partitions) - assert cu_vid == rand_vid + cu_vid, cu_score = cugraph_call(G, partitions) + rand_vid, rand_score = random_call(G, partitions) + # Assert that the partitioning has better modularity than the random # assignment assert cu_score < rand_score diff --git a/python/cugraph/spectral_clustering/test_modularity.py b/python/cugraph/spectral_clustering/test_modularity.py index f39ff838af5..5421af7640e 100644 --- a/python/cugraph/spectral_clustering/test_modularity.py +++ b/python/cugraph/spectral_clustering/test_modularity.py @@ -11,26 +11,28 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -from scipy.io import mmread -import pytest import random +import pytest +from scipy.io import mmread + +import cudf +import cugraph + -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cuGraph_Call(G, partitions): +def cugraph_call(G, partitions): df = cugraph.spectralModularityMaximizationClustering( - G, partitions, num_eigen_vects=(partitions - 1)) + G, partitions, num_eigen_vects=(partitions - 1)) score = cugraph.analyzeClustering_modularity(G, partitions, df['cluster']) return score -def random_Call(G, partitions): +def random_call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): @@ -40,18 +42,18 @@ def random_Call(G, partitions): return score -datasets = [ +DATASETS = [ '/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] +PARTITIONS = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) @@ -59,8 +61,8 @@ def test_modularityClustering(graph_file, partitions): G.add_adj_list(row_offsets, col_indices, values) # Get the modularity score for partitioning versus random assignment - cu_score = cuGraph_Call(G, partitions) - rand_score = random_Call(G, partitions) + cu_score = cugraph_call(G, partitions) + rand_score = random_call(G, partitions) # Assert that the partitioning has better modularity than the random # assignment diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index 69b560339aa..b139c76d311 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -11,23 +11,25 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread + import networkx as nx import numpy as np import pytest +from scipy.io import mmread + +import cudf +import cugraph print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, source): +def cugraph_call(M, source): # Device data sources = cudf.Series(M.row) @@ -56,7 +58,7 @@ def cugraph_Call(M, source): return distances -def networkx_Call(M, source): +def networkx_call(M, source): print('Format conversion ... ') M = M.tocsr() @@ -80,20 +82,20 @@ def networkx_Call(M, source): return path -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -source = [1] +SOURCES = [1] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('source', source) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('source', SOURCES) def test_sssp(graph_file, source): - M = ReadMtxFile(graph_file) - cu_paths = cugraph_Call(M, source) - nx_paths = networkx_Call(M, source) + M = read_mtx_file(graph_file) + cu_paths = cugraph_call(M, source) + nx_paths = networkx_call(M, source) # Calculating mismatch err = 0 diff --git a/python/setup.py b/python/setup.py index 886d4194dfd..0df50e480c1 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,25 +1,26 @@ # Copyright (c) 2018, NVIDIA CORPORATION. +from distutils.sysconfig import get_python_lib +import os +from os.path import join as pjoin +import sys + from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize import numpy - import versioneer -from distutils.sysconfig import get_python_lib -from os.path import join as pjoin -import os -import sys -install_requires = ['numba', 'cython'] + +INSTALL_REQUIRES = ['numba', 'cython'] def find_in_path(name, path): "Find a file in a search path" # adapted fom # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) + for directory in path.split(os.pathsep): + binpath = pjoin(directory, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None @@ -42,8 +43,8 @@ def locate_cuda(): nvcc = find_in_path('nvcc', os.environ['PATH']) if nvcc is None: raise EnvironmentError( - 'The nvcc binary could not be located in your $PATH. ' - 'Either add it to your path, or set $CUDAHOME') + 'The nvcc binary could not be located in your $PATH. ' + 'Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) cudaconfig = {'home': home, 'nvcc': nvcc, @@ -52,7 +53,7 @@ def locate_cuda(): for k, v in iter(cudaconfig.items()): if not os.path.exists(v): raise EnvironmentError( - 'The CUDA %s path could not be located in %s' % (k, v)) + 'The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig @@ -78,18 +79,18 @@ def locate_nvgraph(): NVGRAPH = locate_nvgraph() try: - numpy_include = numpy.get_include() + NUMPY_INCLUDE = numpy.get_include() except AttributeError: - numpy_include = numpy.get_numpy_include() + NUMPY_INCLUDE = numpy.get_numpy_include() -cudf_include = os.path.normpath(sys.prefix) + '/include' -cython_files = ['cugraph/*.pyx'] +CUDF_INCLUDE = os.path.normpath(sys.prefix) + '/include' +CYTHON_FILES = ['cugraph/*.pyx'] -extensions = [ +EXTENSIONS = [ Extension("cugraph", - sources=cython_files, - include_dirs=[numpy_include, - cudf_include, + sources=CYTHON_FILES, + include_dirs=[NUMPY_INCLUDE, + CUDF_INCLUDE, NVGRAPH['include'], CUDA['include'], '../cpp/src', @@ -107,19 +108,19 @@ def locate_nvgraph(): description="cuGraph - GPU Graph Analytics", version=versioneer.get_version(), classifiers=[ - # "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - # "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7" + # "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + # "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7" ], # Include the separately-compiled shared library author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=cythonize(extensions), - install_requires=install_requires, + ext_modules=cythonize(EXTENSIONS), + install_requires=INSTALL_REQUIRES, license="Apache", cmdclass=versioneer.get_cmdclass(), zip_safe=False - ) + ) From ee5926a722b10840144e3263ee5b63756306b228 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 28 Feb 2019 23:21:15 -0800 Subject: [PATCH 15/26] temporarily suppress deprecation warning (python 3.7) in importing networkx --- python/cugraph/jaccard/test_jaccard.py | 11 ++++++++++- python/cugraph/louvain/test_louvain.py | 13 +++++++++++-- python/cugraph/pagerank/test_pagerank.py | 11 ++++++++++- python/cugraph/sssp/test_sssp.py | 12 +++++++++++- 4 files changed, 42 insertions(+), 5 deletions(-) diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index dec0bfb4389..b44db0a8d70 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -13,13 +13,22 @@ import time -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index 1a2ad83101d..e90ba8b13e4 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -13,14 +13,23 @@ import time -import community -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, these import community and import networkx need to be +# relocated in the third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import community + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index b14542400b0..7033252e370 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -13,13 +13,22 @@ import time -import networkx as nx import pytest from scipy.io import mmread import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index b139c76d311..cc638597812 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -13,7 +13,6 @@ import time -import networkx as nx import numpy as np import pytest from scipy.io import mmread @@ -21,6 +20,17 @@ import cudf import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + print('Networkx version : {} '.format(nx.__version__)) From bf6a8e066f07bbd9194ac0259b74b2f571bd8a01 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 1 Mar 2019 11:51:06 -0800 Subject: [PATCH 16/26] fixed a segmentation fault error when invoking view_edge_list on an empty graph --- python/cugraph/graph/c_graph.pyx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index ff5d97b396d..895010d56ae 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -122,7 +122,9 @@ class Graph: """ cdef uintptr_t graph = self.graph_ptr cdef gdf_graph * g = < gdf_graph *> graph - gdf_add_edge_list(g) + err = gdf_add_edge_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size = g.edgeList.src_indices.size cdef uintptr_t src_col_data = < uintptr_t > g.edgeList.src_indices.data @@ -171,10 +173,10 @@ class Graph: Compute the adjacency list from edge list and return offsets and indices as cudf Series. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_add_adj_list(< gdf_graph *> graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef gdf_graph * g = < gdf_graph *> graph + err = gdf_add_adj_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size_off = g.adjList.offsets.size col_size_ind = g.adjList.indices.size From 8335790d33b5b15e0047b31ed772205a6b1d5339 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Fri, 1 Mar 2019 18:12:37 -0800 Subject: [PATCH 17/26] fixed a 'terminate called after throwing an instance of 'thrust::system::system_error' what(): radidx_sort: failed on 2nd step: invalid device pointer' error on test_graph.py --- cpp/include/rmm_utils.h | 2 +- cpp/include/types.h | 2 +- cpp/src/cugraph.cu | 50 +++++++++++++++--------------- python/cugraph/graph/c_graph.pxd | 22 ++++++------- python/cugraph/graph/c_graph.pyx | 27 +++++++++------- python/cugraph/graph/test_graph.py | 2 +- 6 files changed, 54 insertions(+), 51 deletions(-) diff --git a/cpp/include/rmm_utils.h b/cpp/include/rmm_utils.h index b940392cf94..12b1b988fb6 100755 --- a/cpp/include/rmm_utils.h +++ b/cpp/include/rmm_utils.h @@ -45,7 +45,7 @@ class rmm_allocator : public thrust::device_malloc_allocator ~rmm_allocator() {} private: - cudaStream_t stream; + cudaStream_t stream; }; using rmm_temp_allocator = rmm_allocator; // Use this alias for thrust::cuda::par(allocator).on(stream) diff --git a/cpp/include/types.h b/cpp/include/types.h index c9e3de8ad8d..509e035b4b6 100644 --- a/cpp/include/types.h +++ b/cpp/include/types.h @@ -94,7 +94,7 @@ struct gdf_graph{ gdf_dynamic *dynAdjList; //dynamic gdf_graph_properties *prop; gdf_graph() : edgeList(nullptr), adjList(nullptr), transposedAdjList(nullptr), dynAdjList(nullptr), prop(nullptr) {} - ~gdf_graph() { + ~gdf_graph() { if (edgeList) delete edgeList; if (adjList) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 47f9ddf527f..2cb144db135 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -32,11 +32,11 @@ void gdf_col_delete(gdf_column* col) { delete col; col->data = nullptr; col = nullptr; - } + } } void gdf_col_release(gdf_column* col) { - delete col; + delete col; } void cpy_column_view(const gdf_column *in, gdf_column *out) { @@ -284,8 +284,8 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph, gdf_error gdf_add_adj_list(gdf_graph *graph) { - if (graph->adjList != nullptr) - return GDF_SUCCESS; + if (graph->adjList != nullptr) + return GDF_SUCCESS; GDF_REQUIRE( graph->edgeList != nullptr , GDF_INVALID_API_CALL); GDF_REQUIRE( graph->adjList == nullptr , GDF_INVALID_API_CALL); @@ -304,8 +304,8 @@ gdf_error gdf_add_adj_list(gdf_graph *graph) gdf_error gdf_add_transpose(gdf_graph *graph) { - if (graph->edgeList == nullptr) - gdf_add_edge_list(graph); + if (graph->edgeList == nullptr) + gdf_add_edge_list(graph); if (graph->edgeList->edge_data != nullptr) { switch (graph->edgeList->edge_data->dtype) { case GDF_FLOAT32: return gdf_add_transpose_impl(graph); @@ -349,23 +349,23 @@ gdf_error gdf_pagerank(gdf_graph *graph, gdf_column *pagerank, float alpha, floa } gdf_error gdf_bfs(gdf_graph *graph, gdf_column *distances, gdf_column *predecessors, int start_node, bool directed) { - GDF_REQUIRE(graph->adjList != nullptr, GDF_VALIDITY_UNSUPPORTED); - GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - - int n = graph->adjList->offsets->size - 1; - int e = graph->adjList->indices->size; - int* offsets_ptr = (int*)graph->adjList->offsets->data; - int* indices_ptr = (int*)graph->adjList->indices->data; - int* distances_ptr = (int*)distances->data; - int* predecessors_ptr = (int*)predecessors->data; - int alpha = 15; - int beta = 18; - - cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); - bfs.configure(distances_ptr, predecessors_ptr, nullptr); - bfs.traverse(start_node); - return GDF_SUCCESS; + GDF_REQUIRE(graph->adjList != nullptr, GDF_VALIDITY_UNSUPPORTED); + GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + + int n = graph->adjList->offsets->size - 1; + int e = graph->adjList->indices->size; + int* offsets_ptr = (int*)graph->adjList->offsets->data; + int* indices_ptr = (int*)graph->adjList->indices->data; + int* distances_ptr = (int*)distances->data; + int* predecessors_ptr = (int*)predecessors->data; + int alpha = 15; + int beta = 18; + + cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); + bfs.configure(distances_ptr, predecessors_ptr, nullptr); + bfs.traverse(start_node); + return GDF_SUCCESS; } diff --git a/python/cugraph/graph/c_graph.pxd b/python/cugraph/graph/c_graph.pxd index 5219813498d..025761b827b 100755 --- a/python/cugraph/graph/c_graph.pxd +++ b/python/cugraph/graph/c_graph.pxd @@ -2,7 +2,7 @@ from libcpp cimport bool cdef extern from "cudf.h": - ctypedef enum gdf_error: + ctypedef enum gdf_error: pass ctypedef enum gdf_dtype: @@ -13,9 +13,9 @@ cdef extern from "cudf.h": GDF_INT64, GDF_FLOAT32, GDF_FLOAT64, - GDF_DATE32, - GDF_DATE64, - GDF_TIMESTAMP, + GDF_DATE32, + GDF_DATE64, + GDF_TIMESTAMP, GDF_CATEGORY, GDF_STRING, N_GDF_TYPES @@ -24,18 +24,18 @@ cdef extern from "cudf.h": ctypedef size_t gdf_size_type struct gdf_column_: - void *data + void *data gdf_valid_type *valid - gdf_size_type size + gdf_size_type size gdf_dtype dtype gdf_size_type null_count ctypedef gdf_column_ gdf_column - cdef gdf_error gdf_column_view_augmented(gdf_column *column, - void *data, + cdef gdf_error gdf_column_view_augmented(gdf_column *column, + void *data, gdf_valid_type *valid, - gdf_size_type size, + gdf_size_type size, gdf_dtype dtype, gdf_size_type null_count) @@ -60,13 +60,13 @@ cdef extern from "cugraph.h": gdf_adj_list *transposedAdjList - cdef gdf_error gdf_edge_list_view(gdf_graph *graph, + cdef gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *source_indices, const gdf_column *destination_indices, const gdf_column *edge_data) cdef gdf_error gdf_add_edge_list(gdf_graph *graph) cdef gdf_error gdf_delete_edge_list(gdf_graph *graph) - cdef gdf_error gdf_adj_list_view (gdf_graph *graph, + cdef gdf_error gdf_adj_list_view (gdf_graph *graph, const gdf_column *offsets, const gdf_column *indices, const gdf_column *edge_data) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 895010d56ae..fb035ab1b32 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -132,12 +132,13 @@ class Graph: src_data = rmm.device_array_from_ptr(src_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(src_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(src_col_data, 0)) dest_data = rmm.device_array_from_ptr(dest_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(dest_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(dest_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(src_data), cudf.Series(dest_data) @@ -185,12 +186,13 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=col_size_off, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=col_size_ind, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -212,12 +214,13 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=off_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=ind_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # gdf_graph g should be freed when g gets garbage collected. return cudf.Series(offsets_data), cudf.Series(indices_data) diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index b884fba3298..f28cf0084f4 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -114,7 +114,7 @@ def test_transpose_from_adj_list(graph_file): G.add_transpose() Mt = M.transpose().tocsr() toff, tind = G.view_transpose_adj_list() - assert compare_series(Mt.indices, tind) + assert compare_series(tind, Mt.indices) assert compare_offsets(toff, Mt.indptr) From 73980720f28d8ec73a95fc425b12bb67ab7bb543 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:10:26 -0800 Subject: [PATCH 18/26] fixed memory leaks in class Graph --- python/cugraph/graph/c_graph.pyx | 68 ++++++++++++++++++++++---------- 1 file changed, 48 insertions(+), 20 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index fb035ab1b32..2b7cd3da9f6 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -19,11 +19,11 @@ import cudf from librmm_cffi import librmm as rmm import numpy as np + dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} + cdef create_column(col): - - x = < gdf_column *> malloc(sizeof(gdf_column)) cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column)) cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) @@ -39,6 +39,14 @@ cdef create_column(col): cdef uintptr_t col_ptr = < uintptr_t > c_col return col_ptr + +cdef delete_column(col_ptr): + cdef uintptr_t col = col_ptr + cdef gdf_column * c_col = < gdf_column *> col + free(c_col) + return + + class Graph: """ cuGraph graph class containing basic graph creation and transformation operations. @@ -53,12 +61,22 @@ class Graph: >>> import cuGraph >>> G = cuGraph.Graph() """ - cdef gdf_graph * graph - graph = < gdf_graph *> calloc(1, sizeof(gdf_graph)) + print("Invoking __init__") + cdef gdf_graph * g + g = < gdf_graph *> calloc(1, sizeof(gdf_graph)) - cdef uintptr_t graph_ptr = < uintptr_t > graph + cdef uintptr_t graph_ptr = < uintptr_t > g self.graph_ptr = graph_ptr + def __del__(self): + print("Invoking __dealloc__") + cdef uintptr_t graph = self.graph_ptr + cdef gdf_graph * g = < gdf_graph *> graph + self.delete_edge_list() + self.delete_adj_list() + self.delete_transpose() + free(g) + def add_edge_list(self, source_col, dest_col, value_col=None): """ Wrap existing gdf columns representing an edge list in a gdf_graph. cuGraph @@ -100,18 +118,24 @@ class Graph: else: value = create_column(value_col) - err = gdf_edge_list_view(< gdf_graph *> graph, - < gdf_column *> source, - < gdf_column *> dest, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) - + try: + err = gdf_edge_list_view(< gdf_graph *> graph, + < gdf_column *> source, + < gdf_column *> dest, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(source) + delete_column(dest) + if value is not 0: + delete_column(value) + def num_vertices(self): """ Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = graph + cdef gdf_graph* g = < gdf_graph *> graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) return g.adjList.offsets.size - 1 @@ -162,12 +186,18 @@ class Graph: value = 0 else: value = create_column(value_col) - - err = gdf_adj_list_view(< gdf_graph *> graph, - < gdf_column *> offsets, - < gdf_column *> indices, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) + + try: + err = gdf_adj_list_view(< gdf_graph *> graph, + < gdf_column *> offsets, + < gdf_column *> indices, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(offsets) + delete_column(indices) + if value is not 0: + delete_column(value) def view_adj_list(self): """ @@ -248,5 +278,3 @@ class Graph: cdef uintptr_t graph = self.graph_ptr err = gdf_delete_transpose(< gdf_graph *> graph) cudf.bindings.cudf_cpp.check_gdf_error(err) - - From dcb7a1f5a0b91ff5876d59a114a1e370a87638fa Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:12:40 -0800 Subject: [PATCH 19/26] modified test_grmat.py to execute a test case in pytest (this just tests R-mat graph generation successfully returns, does not really validate that the output graph is a valid R-mat graph) --- python/cugraph/grmat/test_grmat.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cugraph/grmat/test_grmat.py b/python/cugraph/grmat/test_grmat.py index 0db1ef816d5..dd458550dd3 100644 --- a/python/cugraph/grmat/test_grmat.py +++ b/python/cugraph/grmat/test_grmat.py @@ -14,5 +14,7 @@ import cugraph -vertices, edges, sources, destinations = cugraph.grmat_gen( - 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') +def test_grmat_gen(): + vertices, edges, sources, destinations = cugraph.grmat_gen( + 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized' + ' --quiet') From 5c89201be7bddf2c03353ffa6782b597fe9cc992 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:13:47 -0800 Subject: [PATCH 20/26] set the seed value for a random number generator to eliminate non-determinism in tests --- python/cugraph/spectral_clustering/test_balanced_cut.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 07b80ce3373..86e485ec215 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -33,6 +33,7 @@ def cugraph_call(G, partitions): def random_call(G, partitions): + random.seed(0) num_verts = G.num_vertices() assignment = [] for i in range(num_verts): From 2dc4b0a3c51d70b8f611a1768a8f984e6f375add Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:37:39 -0800 Subject: [PATCH 21/26] fixed a problem in gdf_col_delete (accessing memory after deallocation is dangerous) --- cpp/src/cugraph.cu | 13 ++++++++++++- cpp/src/tests/test_utils.h | 13 ++++++++++++- 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 2cb144db135..c95b347a814 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -29,9 +29,20 @@ void gdf_col_delete(gdf_column* col) { { ALLOC_FREE_TRY(col->data, nullptr); } +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; + col = nullptr; +#endif } } diff --git a/cpp/src/tests/test_utils.h b/cpp/src/tests/test_utils.h index 881c848c45e..26ec576b3c5 100644 --- a/cpp/src/tests/test_utils.h +++ b/cpp/src/tests/test_utils.h @@ -663,8 +663,19 @@ void gdf_col_delete(gdf_column* col) { cudaStream_t stream{nullptr}; if(col->data) ALLOC_FREE_TRY(col->data, stream); +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; + col = nullptr; +#endif } } From 91f801164137d56e75788553b7776d8bf922b744 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 00:48:36 -0800 Subject: [PATCH 22/26] additional flake8 fixes --- python/cugraph/graph/test_graph.py | 3 ++- python/setup.py | 3 +-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index f28cf0084f4..f773b4a48e2 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -38,7 +38,8 @@ def compare_series(series_1, series_2): def compare_offsets(offset0, offset1): if not (len(offset0) <= len(offset1)): - print("Mismatched length: " + str(len(offset0)) + " != " + str(len(offset1))) + print("Mismatched length: " + str(len(offset0)) + " != " + + str(len(offset1))) return False for i in range(len(offset0)): if offset0[i] != offset1[i]: diff --git a/python/setup.py b/python/setup.py index 0df50e480c1..94e6358e930 100644 --- a/python/setup.py +++ b/python/setup.py @@ -122,5 +122,4 @@ def locate_nvgraph(): install_requires=INSTALL_REQUIRES, license="Apache", cmdclass=versioneer.get_cmdclass(), - zip_safe=False - ) + zip_safe=False) From b7f3c65230d560b718ab18332597b0f9cec0adbc Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 08:43:04 -0800 Subject: [PATCH 23/26] update comments on disabling free on un-owned memory --- python/cugraph/graph/c_graph.pyx | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index 2b7cd3da9f6..a74d2619062 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -162,7 +162,9 @@ class Graph: nelem=col_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(dest_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.edgeList.src_indices.data and g.edgeList.dest_indices.data are not + # owned by this instance, so should not be freed here (this will lead + # to double free, and undefined behavior). return cudf.Series(src_data), cudf.Series(dest_data) @@ -222,7 +224,9 @@ class Graph: nelem=col_size_ind, dtype=np.int32) # , # finalizer=rmm._make_finalizer(indices_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.adjList.offsets.data and g.adjList.indices.data are not owned by + # this instance, so should not be freed here (this will lead to double + # free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -250,7 +254,9 @@ class Graph: nelem=ind_size, dtype=np.int32) # , # finalizer=rmm._make_finalizer(indices_col_data, 0)) - # gdf_graph g should be freed when g gets garbage collected. + # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data + # are not owned by this instance, so should not be freed here (this + # will lead to double free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) From e18ece0cd1a3fc93720d501b6c30ac6512142aef Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 08:52:50 -0800 Subject: [PATCH 24/26] CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5307fb5ebb3..4000c3ea905 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,11 +22,13 @@ - PR #96 Relocated mmio.c and mmio.h (external files) to thirdparty/mmio - PR #97 Updated python tests to speed them up - PR #100 Added testing for returned vertex and edge identifiers +- PR #105 Updated ptyhton code to follow PEP8 (fixed flake8 complaints) ## Bug Fixes - PR #48 ABI Fixes - PR #72 Bug fix for segfault issue getting transpose from adjacency list +- PR #105 Bug fix for memory leaks and python test failures # cuGraph 0.5.0 (28 Jan 2019) From 0e7c12de424e35130720ee9579268cb1527d1537 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 11:26:10 -0800 Subject: [PATCH 25/26] minor fixes for merge --- CHANGELOG.md | 2 +- python/cugraph/jaccard/test_jaccard.py | 6 +++--- python/cugraph/louvain/test_louvain.py | 2 +- python/cugraph/pagerank/test_pagerank.py | 3 ++- python/cugraph/spectral_clustering/test_balanced_cut.py | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4000c3ea905..18c34e1b97e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,7 @@ - PR #96 Relocated mmio.c and mmio.h (external files) to thirdparty/mmio - PR #97 Updated python tests to speed them up - PR #100 Added testing for returned vertex and edge identifiers -- PR #105 Updated ptyhton code to follow PEP8 (fixed flake8 complaints) +- PR #105 Updated python code to follow PEP8 (fixed flake8 complaints) ## Bug Fixes diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index b44db0a8d70..44a778937fa 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -58,8 +58,8 @@ def cugraph_call(M): t2 = time.time() - t1 print('Time : '+str(t2)) - return df['source'].to_array(), df['destination'].to_array(), df['jaccard_coeff'].to_array() - + return df['source'].to_array(), df['destination'].to_array(),\ + df['jaccard_coeff'].to_array() def networkx_call(M): @@ -105,7 +105,7 @@ def networkx_call(M): def test_jaccard(graph_file): M = read_mtx_file(graph_file) - cu_src, icu_dst, cu_coeff = cugraph_call(M) + cu_src, cu_dst, cu_coeff = cugraph_call(M) nx_src, nx_dst, nx_coeff = networkx_call(M) # Calculating mismatch diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index e90ba8b13e4..0c7f0824c34 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -97,7 +97,7 @@ def test_louvain(graph_file): cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index 7033252e370..03527c14ec5 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -127,7 +127,8 @@ def test_pagerank(graph_file, max_iter, tol, alpha): err = 0 assert len(cugraph_pr) == len(networkx_pr) for i in range(len(cugraph_pr)): - if(abs(cugraph_pr[i][1]-networkx_pr[i][1]) > tol*1.1 and sorted_ptr[i][0] == networkx_pr[i][0]): + if(abs(cugraph_pr[i][1]-networkx_pr[i][1]) > tol*1.1 + and cugraph_pr[i][0] == networkx_pr[i][0]): err = err + 1 print(err) assert err < (0.01*len(cugraph_pr)) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 86e485ec215..cec661b9a19 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -41,7 +41,7 @@ def random_call(G, partitions): assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return set(range(num_verts)), score - + DATASETS = [ '/datasets/networks/karate.mtx', From 9da6b382a787dff6bc3c948a21dbc9dfbdf76530 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Tue, 5 Mar 2019 11:49:52 -0800 Subject: [PATCH 26/26] removed prints for debugging --- python/cugraph/graph/c_graph.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index a74d2619062..1a13948b9e1 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -61,7 +61,6 @@ class Graph: >>> import cuGraph >>> G = cuGraph.Graph() """ - print("Invoking __init__") cdef gdf_graph * g g = < gdf_graph *> calloc(1, sizeof(gdf_graph)) @@ -69,7 +68,6 @@ class Graph: self.graph_ptr = graph_ptr def __del__(self): - print("Invoking __dealloc__") cdef uintptr_t graph = self.graph_ptr cdef gdf_graph * g = < gdf_graph *> graph self.delete_edge_list()