diff --git a/CHANGELOG.md b/CHANGELOG.md index 37f37ced621..900fac50ff5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,11 +22,13 @@ - PR #96 Relocated mmio.c and mmio.h (external files) to thirdparty/mmio - PR #97 Updated python tests to speed them up - PR #100 Added testing for returned vertex and edge identifiers +- PR #105 Updated python code to follow PEP8 (fixed flake8 complaints) ## Bug Fixes - PR #48 ABI Fixes - PR #72 Bug fix for segfault issue getting transpose from adjacency list +- PR #105 Bug fix for memory leaks and python test failures - PR #110 Bug fix for segfault calling Louvain with only edge list # cuGraph 0.5.0 (28 Jan 2019) diff --git a/cpp/include/rmm_utils.h b/cpp/include/rmm_utils.h index b940392cf94..12b1b988fb6 100755 --- a/cpp/include/rmm_utils.h +++ b/cpp/include/rmm_utils.h @@ -45,7 +45,7 @@ class rmm_allocator : public thrust::device_malloc_allocator ~rmm_allocator() {} private: - cudaStream_t stream; + cudaStream_t stream; }; using rmm_temp_allocator = rmm_allocator; // Use this alias for thrust::cuda::par(allocator).on(stream) diff --git a/cpp/include/types.h b/cpp/include/types.h index c9e3de8ad8d..509e035b4b6 100644 --- a/cpp/include/types.h +++ b/cpp/include/types.h @@ -94,7 +94,7 @@ struct gdf_graph{ gdf_dynamic *dynAdjList; //dynamic gdf_graph_properties *prop; gdf_graph() : edgeList(nullptr), adjList(nullptr), transposedAdjList(nullptr), dynAdjList(nullptr), prop(nullptr) {} - ~gdf_graph() { + ~gdf_graph() { if (edgeList) delete edgeList; if (adjList) diff --git a/cpp/src/cugraph.cu b/cpp/src/cugraph.cu index 2602a2ffbcc..d07853112a2 100644 --- a/cpp/src/cugraph.cu +++ b/cpp/src/cugraph.cu @@ -29,14 +29,25 @@ void gdf_col_delete(gdf_column* col) { { ALLOC_FREE_TRY(col->data, nullptr); } +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; - } + col = nullptr; +#endif + } } void gdf_col_release(gdf_column* col) { - delete col; + delete col; } void cpy_column_view(const gdf_column *in, gdf_column *out) { @@ -284,8 +295,8 @@ gdf_error gdf_pagerank_impl (gdf_graph *graph, gdf_error gdf_add_adj_list(gdf_graph *graph) { - if (graph->adjList != nullptr) - return GDF_SUCCESS; + if (graph->adjList != nullptr) + return GDF_SUCCESS; GDF_REQUIRE( graph->edgeList != nullptr , GDF_INVALID_API_CALL); GDF_REQUIRE( graph->adjList == nullptr , GDF_INVALID_API_CALL); @@ -304,8 +315,8 @@ gdf_error gdf_add_adj_list(gdf_graph *graph) gdf_error gdf_add_transpose(gdf_graph *graph) { - if (graph->edgeList == nullptr) - gdf_add_edge_list(graph); + if (graph->edgeList == nullptr) + gdf_add_edge_list(graph); if (graph->edgeList->edge_data != nullptr) { switch (graph->edgeList->edge_data->dtype) { case GDF_FLOAT32: return gdf_add_transpose_impl(graph); @@ -349,26 +360,26 @@ gdf_error gdf_pagerank(gdf_graph *graph, gdf_column *pagerank, float alpha, floa } gdf_error gdf_bfs(gdf_graph *graph, gdf_column *distances, gdf_column *predecessors, int start_node, bool directed) { - GDF_REQUIRE(graph->adjList != nullptr || graph->edgeList != nullptr, GDF_INVALID_API_CALL); - gdf_error err = gdf_add_adj_list(graph); - if (err != GDF_SUCCESS) - return err; - GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); - - int n = graph->adjList->offsets->size - 1; - int e = graph->adjList->indices->size; - int* offsets_ptr = (int*)graph->adjList->offsets->data; - int* indices_ptr = (int*)graph->adjList->indices->data; - int* distances_ptr = (int*)distances->data; - int* predecessors_ptr = (int*)predecessors->data; - int alpha = 15; - int beta = 18; - - cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); - bfs.configure(distances_ptr, predecessors_ptr, nullptr); - bfs.traverse(start_node); - return GDF_SUCCESS; + GDF_REQUIRE(graph->adjList != nullptr || graph->edgeList != nullptr, GDF_INVALID_API_CALL); + gdf_error err = gdf_add_adj_list(graph); + if (err != GDF_SUCCESS) + return err; + GDF_REQUIRE(graph->adjList->offsets->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(graph->adjList->indices->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(distances->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + GDF_REQUIRE(predecessors->dtype == GDF_INT32, GDF_UNSUPPORTED_DTYPE); + + int n = graph->adjList->offsets->size - 1; + int e = graph->adjList->indices->size; + int* offsets_ptr = (int*)graph->adjList->offsets->data; + int* indices_ptr = (int*)graph->adjList->indices->data; + int* distances_ptr = (int*)distances->data; + int* predecessors_ptr = (int*)predecessors->data; + int alpha = 15; + int beta = 18; + + cugraph::Bfs bfs(n, e, offsets_ptr, indices_ptr, directed, alpha, beta); + bfs.configure(distances_ptr, predecessors_ptr, nullptr); + bfs.traverse(start_node); + return GDF_SUCCESS; } diff --git a/cpp/src/tests/test_utils.h b/cpp/src/tests/test_utils.h index 881c848c45e..26ec576b3c5 100644 --- a/cpp/src/tests/test_utils.h +++ b/cpp/src/tests/test_utils.h @@ -663,8 +663,19 @@ void gdf_col_delete(gdf_column* col) { cudaStream_t stream{nullptr}; if(col->data) ALLOC_FREE_TRY(col->data, stream); +#if 1 +// If delete col is executed, the memory pointed by col is no longer valid and +// can be used in another memory allocation, so executing col->data = nullptr +// after delete col is dangerous, also, col = nullptr has no effect here (the +// address is passed by value, for col = nullptr should work, the input +// parameter should be gdf_column*& col (or alternatively, gdf_column** col and +// *col = nullptr also work) + col->data = nullptr; + delete col; +#else delete col; col->data = nullptr; - col = nullptr; + col = nullptr; +#endif } } diff --git a/python/cugraph/bfs/test_bfs.py b/python/cugraph/bfs/test_bfs.py index e75daa8b855..f52a80ce12e 100644 --- a/python/cugraph/bfs/test_bfs.py +++ b/python/cugraph/bfs/test_bfs.py @@ -11,41 +11,46 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf +import queue import time -from scipy.io import mmread -import pytest + import numpy as np +import pytest +from scipy.io import mmread -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +import cudf +import cugraph -def cugraph_Call(M, start_vertex): +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + +def cugraph_call(M, start_vertex): # Device data M = M.tocsr() sources = cudf.Series(M.indptr) destinations = cudf.Series(M.indices) values = cudf.Series(M.data) - + G = cugraph.Graph() G.add_adj_list(sources, destinations, values) - + t1 = time.time() df = cugraph.bfs(G, start_vertex) t2 = time.time() - t1 print('Time : '+str(t2)) - + # Return distances as np.array() return df['vertex'].to_array(), df['distance'].to_array() -def base_Call(M, start_vertex): - intMax = 2147483647 +def base_call(M, start_vertex): + int_max = 2**31 - 1 + M = M.tocsr() + offsets = M.indptr indices = M.indices num_verts = len(offsets) - 1 @@ -53,33 +58,38 @@ def base_Call(M, start_vertex): vertex = list(range(num_verts)) for i in range(num_verts): - dist[i] = intMax - import queue + dist[i] = int_max + q = queue.Queue() q.put(start_vertex) dist[start_vertex] = 0 while(not q.empty()): u = q.get() - for iCol in range(offsets[u],offsets[u + 1]): - v = indices[iCol] - if (dist[v] == intMax): + for i_col in range(offsets[u], offsets[u + 1]): + v = indices[i_col] + if (dist[v] == int_max): dist[v] = dist[u] + 1 q.put(v) + return vertex, dist -datasets = ['/datasets/networks/dolphins.mtx', + +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/networks/polbooks.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) + +@pytest.mark.parametrize('graph_file', DATASETS) def test_bfs(graph_file): + M = read_mtx_file(graph_file) + + base_vid, base_dist = base_call(M, 0) + cugraph_vid, cugraph_dist = cugraph_call(M, 0) + + # Calculating mismatch - M = ReadMtxFile(graph_file) - base_v_id, base_dist = base_Call(M, 0) - v_id, dist = cugraph_Call(M, 0) - - assert len(base_dist) == len(dist) - for i in range(len(dist)): - assert base_v_id[i] == v_id[i] - assert base_dist[i] == dist[i] + assert len(base_dist) == len(cugraph_dist) + for i in range(len(cugraph_dist)): + assert base_vid[i] == cugraph_vid[i] + assert base_dist[i] == cugraph_dist[i] diff --git a/python/cugraph/graph/c_graph.pxd b/python/cugraph/graph/c_graph.pxd index 5219813498d..025761b827b 100755 --- a/python/cugraph/graph/c_graph.pxd +++ b/python/cugraph/graph/c_graph.pxd @@ -2,7 +2,7 @@ from libcpp cimport bool cdef extern from "cudf.h": - ctypedef enum gdf_error: + ctypedef enum gdf_error: pass ctypedef enum gdf_dtype: @@ -13,9 +13,9 @@ cdef extern from "cudf.h": GDF_INT64, GDF_FLOAT32, GDF_FLOAT64, - GDF_DATE32, - GDF_DATE64, - GDF_TIMESTAMP, + GDF_DATE32, + GDF_DATE64, + GDF_TIMESTAMP, GDF_CATEGORY, GDF_STRING, N_GDF_TYPES @@ -24,18 +24,18 @@ cdef extern from "cudf.h": ctypedef size_t gdf_size_type struct gdf_column_: - void *data + void *data gdf_valid_type *valid - gdf_size_type size + gdf_size_type size gdf_dtype dtype gdf_size_type null_count ctypedef gdf_column_ gdf_column - cdef gdf_error gdf_column_view_augmented(gdf_column *column, - void *data, + cdef gdf_error gdf_column_view_augmented(gdf_column *column, + void *data, gdf_valid_type *valid, - gdf_size_type size, + gdf_size_type size, gdf_dtype dtype, gdf_size_type null_count) @@ -60,13 +60,13 @@ cdef extern from "cugraph.h": gdf_adj_list *transposedAdjList - cdef gdf_error gdf_edge_list_view(gdf_graph *graph, + cdef gdf_error gdf_edge_list_view(gdf_graph *graph, const gdf_column *source_indices, const gdf_column *destination_indices, const gdf_column *edge_data) cdef gdf_error gdf_add_edge_list(gdf_graph *graph) cdef gdf_error gdf_delete_edge_list(gdf_graph *graph) - cdef gdf_error gdf_adj_list_view (gdf_graph *graph, + cdef gdf_error gdf_adj_list_view (gdf_graph *graph, const gdf_column *offsets, const gdf_column *indices, const gdf_column *edge_data) diff --git a/python/cugraph/graph/c_graph.pyx b/python/cugraph/graph/c_graph.pyx index ff5d97b396d..1a13948b9e1 100755 --- a/python/cugraph/graph/c_graph.pyx +++ b/python/cugraph/graph/c_graph.pyx @@ -19,11 +19,11 @@ import cudf from librmm_cffi import librmm as rmm import numpy as np + dtypes = {np.int32: GDF_INT32, np.int64: GDF_INT64, np.float32: GDF_FLOAT32, np.float64: GDF_FLOAT64} + cdef create_column(col): - - x = < gdf_column *> malloc(sizeof(gdf_column)) cdef gdf_column * c_col = < gdf_column *> malloc(sizeof(gdf_column)) cdef uintptr_t data_ptr = cudf.bindings.cudf_cpp.get_column_data_ptr(col._column) # cdef uintptr_t valid_ptr = cudf.bindings.cudf_cpp.get_column_valid_ptr(col._column) @@ -39,6 +39,14 @@ cdef create_column(col): cdef uintptr_t col_ptr = < uintptr_t > c_col return col_ptr + +cdef delete_column(col_ptr): + cdef uintptr_t col = col_ptr + cdef gdf_column * c_col = < gdf_column *> col + free(c_col) + return + + class Graph: """ cuGraph graph class containing basic graph creation and transformation operations. @@ -53,12 +61,20 @@ class Graph: >>> import cuGraph >>> G = cuGraph.Graph() """ - cdef gdf_graph * graph - graph = < gdf_graph *> calloc(1, sizeof(gdf_graph)) + cdef gdf_graph * g + g = < gdf_graph *> calloc(1, sizeof(gdf_graph)) - cdef uintptr_t graph_ptr = < uintptr_t > graph + cdef uintptr_t graph_ptr = < uintptr_t > g self.graph_ptr = graph_ptr + def __del__(self): + cdef uintptr_t graph = self.graph_ptr + cdef gdf_graph * g = < gdf_graph *> graph + self.delete_edge_list() + self.delete_adj_list() + self.delete_transpose() + free(g) + def add_edge_list(self, source_col, dest_col, value_col=None): """ Wrap existing gdf columns representing an edge list in a gdf_graph. cuGraph @@ -100,18 +116,24 @@ class Graph: else: value = create_column(value_col) - err = gdf_edge_list_view(< gdf_graph *> graph, - < gdf_column *> source, - < gdf_column *> dest, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) - + try: + err = gdf_edge_list_view(< gdf_graph *> graph, + < gdf_column *> source, + < gdf_column *> dest, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(source) + delete_column(dest) + if value is not 0: + delete_column(value) + def num_vertices(self): """ Get the number of vertices in the graph """ cdef uintptr_t graph = self.graph_ptr - cdef gdf_graph* g = graph + cdef gdf_graph* g = < gdf_graph *> graph err = gdf_add_adj_list(g) cudf.bindings.cudf_cpp.check_gdf_error(err) return g.adjList.offsets.size - 1 @@ -122,7 +144,9 @@ class Graph: """ cdef uintptr_t graph = self.graph_ptr cdef gdf_graph * g = < gdf_graph *> graph - gdf_add_edge_list(g) + err = gdf_add_edge_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size = g.edgeList.src_indices.size cdef uintptr_t src_col_data = < uintptr_t > g.edgeList.src_indices.data @@ -130,12 +154,15 @@ class Graph: src_data = rmm.device_array_from_ptr(src_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(src_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(src_col_data, 0)) dest_data = rmm.device_array_from_ptr(dest_col_data, nelem=col_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(dest_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(dest_col_data, 0)) + # g.edgeList.src_indices.data and g.edgeList.dest_indices.data are not + # owned by this instance, so should not be freed here (this will lead + # to double free, and undefined behavior). return cudf.Series(src_data), cudf.Series(dest_data) @@ -159,22 +186,28 @@ class Graph: value = 0 else: value = create_column(value_col) - - err = gdf_adj_list_view(< gdf_graph *> graph, - < gdf_column *> offsets, - < gdf_column *> indices, - < gdf_column *> value) - cudf.bindings.cudf_cpp.check_gdf_error(err) + + try: + err = gdf_adj_list_view(< gdf_graph *> graph, + < gdf_column *> offsets, + < gdf_column *> indices, + < gdf_column *> value) + cudf.bindings.cudf_cpp.check_gdf_error(err) + finally: + delete_column(offsets) + delete_column(indices) + if value is not 0: + delete_column(value) def view_adj_list(self): """ Compute the adjacency list from edge list and return offsets and indices as cudf Series. """ cdef uintptr_t graph = self.graph_ptr - err = gdf_add_adj_list(< gdf_graph *> graph) - cudf.bindings.cudf_cpp.check_gdf_error(err) - cdef gdf_graph * g = < gdf_graph *> graph + err = gdf_add_adj_list(g) + cudf.bindings.cudf_cpp.check_gdf_error(err) + col_size_off = g.adjList.offsets.size col_size_ind = g.adjList.indices.size @@ -183,12 +216,15 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=col_size_off, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=col_size_ind, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # g.adjList.offsets.data and g.adjList.indices.data are not owned by + # this instance, so should not be freed here (this will lead to double + # free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -210,12 +246,15 @@ class Graph: offsets_data = rmm.device_array_from_ptr(offsets_col_data, nelem=off_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(offsets_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(offsets_col_data, 0)) indices_data = rmm.device_array_from_ptr(indices_col_data, nelem=ind_size, - dtype=np.int32, - finalizer=rmm._make_finalizer(indices_col_data, 0)) + dtype=np.int32) # , + # finalizer=rmm._make_finalizer(indices_col_data, 0)) + # g.transposedAdjList.offsets.data and g.transposedAdjList.indices.data + # are not owned by this instance, so should not be freed here (this + # will lead to double free, and undefined behavior). return cudf.Series(offsets_data), cudf.Series(indices_data) @@ -243,5 +282,3 @@ class Graph: cdef uintptr_t graph = self.graph_ptr err = gdf_delete_transpose(< gdf_graph *> graph) cudf.bindings.cudf_cpp.check_gdf_error(err) - - diff --git a/python/cugraph/graph/test_graph.py b/python/cugraph/graph/test_graph.py index 7b9097e76c2..f773b4a48e2 100755 --- a/python/cugraph/graph/test_graph.py +++ b/python/cugraph/graph/test_graph.py @@ -11,15 +11,18 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import pytest import numpy as np +import pytest from scipy.io import mmread -def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') - return mmread(mmFile).asfptype() +import cugraph +import cudf + + +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + def compare_series(series_1, series_2): if (len(series_1) != len(series_2)): @@ -27,56 +30,63 @@ def compare_series(series_1, series_2): return 0 for i in range(len(series_1)): if(series_1[i] != series_2[i]): - print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + ", " + str(series_2[i])) + print("Series[" + str(i) + "] does not match, " + str(series_1[i]) + + ", " + str(series_2[i])) return 0 return 1 -def compareOffsets(cu, np): - if not (len(cu) <= len(np)): - print("Mismatched length: " + str(len(cu)) + " != " + str(len(np))) + +def compare_offsets(offset0, offset1): + if not (len(offset0) <= len(offset1)): + print("Mismatched length: " + str(len(offset0)) + " != " + + str(len(offset1))) return False - for i in range(len(cu)): - if cu[i] != np[i]: - print("Series[" + str(i) + "]: " + str(cu[i]) + " != " + str(np[i])) + for i in range(len(offset0)): + if offset0[i] != offset1[i]: + print("Series[" + str(i) + "]: " + str(offset0[i]) + " != " + + str(offset1[i])) return False return True -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +DATASETS = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) + +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_edge_list_to_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets_exp = M.indptr indices_exp = M.indices # cugraph add_egde_list to_adj_list call G = cugraph.Graph() - G.add_edge_list(sources,destinations, None) + G.add_edge_list(sources, destinations, None) offsets_cu, indices_cu = G.view_adj_list() - assert compareOffsets(offsets_cu, offsets_exp) + assert compare_offsets(offsets_cu, offsets_exp) assert compare_series(indices_cu, indices_exp) -@pytest.mark.parametrize('graph_file', datasets) + +@pytest.mark.parametrize('graph_file', DATASETS) def test_add_adj_list_to_edge_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -92,10 +102,11 @@ def test_add_adj_list_to_edge_list(graph_file): destinations_cu = np.array(destinations) assert compare_series(sources_cu, sources_exp) assert compare_series(destinations_cu, destinations_exp) - -@pytest.mark.parametrize('graph_file', datasets) -def test_transpose_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + + +@pytest.mark.parametrize('graph_file', DATASETS) +def test_transpose_from_adj_list(graph_file): + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -104,12 +115,13 @@ def test_transpose_from_adj_list(graph_file): G.add_transpose() Mt = M.transpose().tocsr() toff, tind = G.view_transpose_adj_list() - assert compare_series(Mt.indices, tind) - assert compareOffsets(toff, Mt.indptr) - -@pytest.mark.parametrize('graph_file', datasets) + assert compare_series(tind, Mt.indices) + assert compare_offsets(toff, Mt.indptr) + + +@pytest.mark.parametrize('graph_file', DATASETS) def test_view_edge_list_from_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) M = M.tocsr() offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -121,19 +133,20 @@ def test_view_edge_list_from_adj_list(graph_file): dst1 = M.col assert compare_series(src1, src2) assert compare_series(dst1, dst2) - -@pytest.mark.parametrize('graph_file', datasets) + + +@pytest.mark.parametrize('graph_file', DATASETS) def test_delete_edge_list_delete_adj_list(graph_file): - M = ReadMtxFile(graph_file) + M = read_mtx_file(graph_file) sources = cudf.Series(M.row) destinations = cudf.Series(M.col) M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - + offsets = cudf.Series(M.indptr) indices = cudf.Series(M.indices) @@ -150,5 +163,3 @@ def test_delete_edge_list_delete_adj_list(graph_file): with pytest.raises(cudf.bindings.GDFError.GDFError) as excinfo: G.view_edge_list() assert excinfo.value.errcode.decode() == 'GDF_INVALID_API_CALL' - - diff --git a/python/cugraph/grmat/test_grmat.py b/python/cugraph/grmat/test_grmat.py index 3d3933b6191..dd458550dd3 100644 --- a/python/cugraph/grmat/test_grmat.py +++ b/python/cugraph/grmat/test_grmat.py @@ -12,5 +12,9 @@ # limitations under the License. import cugraph -vertices, edges, sources, destinations = cugraph.grmat_gen('grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized --quiet') + +def test_grmat_gen(): + vertices, edges, sources, destinations = cugraph.grmat_gen( + 'grmat --rmat_scale=2 --rmat_edgefactor=2 --device=0 --normalized' + ' --quiet') diff --git a/python/cugraph/jaccard/test_jaccard.py b/python/cugraph/jaccard/test_jaccard.py index 690a767245b..44a778937fa 100644 --- a/python/cugraph/jaccard/test_jaccard.py +++ b/python/cugraph/jaccard/test_jaccard.py @@ -11,47 +11,58 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np -import sys import time -from scipy.io import mmread -import networkx as nx -import os + import pytest +from scipy.io import mmread + +import cudf +import cugraph + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + +print('Networkx version : {} '.format(nx.__version__)) -print ('Networkx version : {} '.format(nx.__version__)) +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') - return mmread(mmFile).asfptype() - -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) - return df['source'].to_array(), df['destination'].to_array(), df['jaccard_coeff'].to_array() + return df['source'].to_array(), df['destination'].to_array(),\ + df['jaccard_coeff'].to_array() + -def networkx_Call(M): +def networkx_call(M): M = M.tocsr() M = M.tocoo() @@ -59,8 +70,9 @@ def networkx_Call(M): destinations = M.col edges = [] for i in range(len(sources)): - edges.append((sources[i],destinations[i])) - # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this explicitly + edges.append((sources[i], destinations[i])) + # in NVGRAPH tests we read as CSR and feed as CSC, so here we doing this + # explicitly print('Format conversion ... ') # Directed NetworkX graph @@ -71,39 +83,40 @@ def networkx_Call(M): print('Solving... ') t1 = time.time() preds = nx.jaccard_coefficient(Gnx, edges) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) - coeff = [] src = [] dst = [] - for u,v,p in preds: + coeff = [] + for u, v, p in preds: src.append(u) dst.append(v) coeff.append(p) return src, dst, coeff - -datasets = ['/datasets/networks/dolphins.mtx', - '/datasets/networks/karate.mtx' , + +DATASETS = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_jaccard(graph_file): - M = ReadMtxFile(graph_file) - cu_src, cu_dst, cu_coeff = cuGraph_Call(M) - nx_src, nx_dst, nx_coeff = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_src, cu_dst, cu_coeff = cugraph_call(M) + nx_src, nx_dst, nx_coeff = networkx_call(M) + # Calculating mismatch err = 0 tol = 1.0e-06 + assert len(cu_coeff) == len(nx_coeff) for i in range(len(cu_coeff)): - if(abs(cu_coeff[i] -nx_coeff[i])>tol*1.1 and cu_src == nx_src and cu_dst == nx_dst): - err+=1 - print("Mismatches: %d" %err) - assert err == 0 - - + if(abs(cu_coeff[i] - nx_coeff[i]) > tol*1.1 and cu_src == nx_src + and cu_dst == nx_dst): + err += 1 + print("Mismatches: %d" % err) + assert err == 0 diff --git a/python/cugraph/jaccard/test_wjaccard.py b/python/cugraph/jaccard/test_wjaccard.py index bda9000863d..9a4315893d5 100644 --- a/python/cugraph/jaccard/test_wjaccard.py +++ b/python/cugraph/jaccard/test_wjaccard.py @@ -11,61 +11,62 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np -import sys import time -from scipy.io import mmread + import networkx as nx -import os +import numpy as np import pytest +from scipy.io import mmread -print ('Networkx version : {} '.format(nx.__version__)) +import cudf +import cugraph -def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') - return mmread(mmFile).asfptype() - +print('Networkx version : {} '.format(nx.__version__)) -def cuGraph_Callw(M): + +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + + +def cugraph_call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) - #values = cudf.Series(np.ones(len(col_indices), dtype = np.float32), nan_as_null = False) - weights_arr = cudf.Series(np.ones(len(row_offsets), dtype = np.float32), nan_as_null = False) - + # values = cudf.Series(np.ones(len(col_indices), dtype=np.float32), + # nan_as_null=False) + weights_arr = cudf.Series(np.ones(len(row_offsets), dtype=np.float32), + nan_as_null=False) + G = cugraph.Graph() - G.add_adj_list(row_offsets,col_indices,None) + G.add_adj_list(row_offsets, col_indices, None) # cugraph Jaccard Call t1 = time.time() df = cugraph.nvJaccard_w(G, weights_arr) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return df['jaccard_coeff'] - -datasets = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] +DATASETS = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_wjaccard(graph_file): - M = ReadMtxFile(graph_file) - cu_coeff = cuGraph_Callw(M) - - # no NetworkX equivalent to compare against... - - - - + M = read_mtx_file(graph_file) + # suppress F841 (local variable is assigned but never used) in flake8 + # no networkX equivalent to compare cu_coeff against... + cu_coeff = cugraph_call(M) # noqa: F841 + # this test is incomplete... diff --git a/python/cugraph/louvain/test_louvain.py b/python/cugraph/louvain/test_louvain.py index 30262c38a91..0c7f0824c34 100644 --- a/python/cugraph/louvain/test_louvain.py +++ b/python/cugraph/louvain/test_louvain.py @@ -11,82 +11,93 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np -import sys import time -from scipy.io import mmread -import networkx as nx -import community -import os + import pytest +from scipy.io import mmread + +import cudf +import cugraph + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, these import community and import networkx need to be +# relocated in the third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import community + import networkx as nx + + +print('Networkx version : {} '.format(nx.__version__)) -print ('Networkx version : {} '.format(nx.__version__)) +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def ReadMtxFile(mmFile): - print('Reading '+ str(mmFile) + '...') - return mmread(mmFile).asfptype() - -def cuGraph_Call(M): +def cugraph_call(M): M = M.tocsr() - if M is None : + if M is None: raise TypeError('Could not read the input graph') if M.shape[0] != M.shape[1]: raise TypeError('Shape is not square') - #Device data + # Device data row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G = cugraph.Graph() - G.add_adj_list(row_offsets, col_indices, values) + G.add_adj_list(row_offsets, col_indices, values) # cugraph Louvain Call t1 = time.time() parts, mod = cugraph.nvLouvain(G) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts, mod -def networkx_Call(M): + +def networkx_call(M): M = M.tocsr() # Directed NetworkX graph Gnx = nx.Graph(M) - #z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} + # z = {k: 1.0/M.shape[0] for k in range(M.shape[0])} # Networkx Jaccard Call print('Solving... ') t1 = time.time() parts = community.best_partition(Gnx) - t2 = time.time() - t1 + t2 = time.time() - t1 print('Time : '+str(t2)) return parts - -datasets = ['/datasets/networks/karate.mtx', - '/datasets/networks/dolphins.mtx', + +DATASETS = ['/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', '/datasets/networks/netscience.mtx'] -@pytest.mark.parametrize('graph_file', datasets) +@pytest.mark.parametrize('graph_file', DATASETS) def test_louvain(graph_file): - M = ReadMtxFile(graph_file) - cu_parts, cu_mod = cuGraph_Call(M) - nx_parts = networkx_Call(M) + M = read_mtx_file(graph_file) + cu_parts, cu_mod = cugraph_call(M) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison Gnx = nx.Graph(M) - cu_map = {0:0} + cu_map = {0: 0} for i in range(len(cu_parts)): cu_map[cu_parts['vertex'][i]] = cu_parts['partition'][i] - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) assert len(cu_parts) == len(nx_parts) diff --git a/python/cugraph/pagerank/test_pagerank.py b/python/cugraph/pagerank/test_pagerank.py index eaf6fdaa73f..03527c14ec5 100755 --- a/python/cugraph/pagerank/test_pagerank.py +++ b/python/cugraph/pagerank/test_pagerank.py @@ -11,28 +11,39 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread -import networkx as nx + import pytest +from scipy.io import mmread -print('Networkx version : {} '.format(nx.__version__)) +import cudf +import cugraph +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() + +print('Networkx version : {} '.format(nx.__version__)) -def cugraph_Call(M, max_iter, tol, alpha): +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + +def cugraph_call(M, max_iter, tol, alpha): # Device data sources = cudf.Series(M.row) destinations = cudf.Series(M.col) # values = cudf.Series(np.ones(len(sources), dtype = np.float64)) - + # cugraph Pagerank Call G = cugraph.Graph() G.add_edge_list(sources, destinations, None) @@ -50,8 +61,7 @@ def cugraph_Call(M, max_iter, tol, alpha): return sorted(sorted_pr, key=lambda x: x[1], reverse=True) -def networkx_Call(M, max_iter, tol, alpha): - +def networkx_call(M, max_iter, tol, alpha): nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} for nnz in range(M.getnnz()): nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] @@ -93,29 +103,32 @@ def networkx_Call(M, max_iter, tol, alpha): return sorted(pr.items(), key=lambda x: x[1], reverse=True) -datasets = ['/datasets/networks/dolphins.mtx', - '/datasets/networks/karate.mtx' , +DATASETS = ['/datasets/networks/dolphins.mtx', + '/datasets/networks/karate.mtx', '/datasets/networks/netscience.mtx'] -Max_Iterations = [500] -tolerance = [1.0e-06] -alpha = [0.85] +MAX_ITERATIONS = [500] +TOLERANCE = [1.0e-06] +ALPHA = [0.85] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('max_iter', Max_Iterations) -@pytest.mark.parametrize('tol', tolerance) -@pytest.mark.parametrize('alpha', alpha) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('max_iter', MAX_ITERATIONS) +@pytest.mark.parametrize('tol', TOLERANCE) +@pytest.mark.parametrize('alpha', ALPHA) def test_pagerank(graph_file, max_iter, tol, alpha): + M = read_mtx_file(graph_file) + + networkx_pr = networkx_call(M, max_iter, tol, alpha) + cugraph_pr = cugraph_call(M, max_iter, tol, alpha) - M = ReadMtxFile(graph_file) - sorted_pr = cugraph_Call(M, max_iter, tol, alpha) - items = networkx_Call(M, max_iter, tol, alpha) # Calculating mismatch + err = 0 - # assert len(sorted_pr) == len(items) - for i in range(len(sorted_pr)): - if(abs(sorted_pr[i][1]-items[i][1]) > tol*1.1 and sorted_pr[i][0] == items[i][0]): + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if(abs(cugraph_pr[i][1]-networkx_pr[i][1]) > tol*1.1 + and cugraph_pr[i][0] == networkx_pr[i][0]): err = err + 1 print(err) - assert err < (0.01*len(sorted_pr)) + assert err < (0.01*len(cugraph_pr)) diff --git a/python/cugraph/spectral_clustering/test_balanced_cut.py b/python/cugraph/spectral_clustering/test_balanced_cut.py index 209b4c33bd7..cec661b9a19 100644 --- a/python/cugraph/spectral_clustering/test_balanced_cut.py +++ b/python/cugraph/spectral_clustering/test_balanced_cut.py @@ -11,53 +11,60 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np -import sys -import time -from scipy.io import mmread -import community -import os -import pytest import random -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +import pytest +from scipy.io import mmread + +import cudf +import cugraph + + +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + - -def cuGraph_Call(G, partitions): - df = cugraph.spectralBalancedCutClustering(G, partitions, num_eigen_vects=partitions) +def cugraph_call(G, partitions): + df = cugraph.spectralBalancedCutClustering(G, partitions, + num_eigen_vects=partitions) score = cugraph.analyzeClustering_edge_cut(G, partitions, df['cluster']) return set(df['vertex'].to_array()), score -def random_Call(G, partitions): + +def random_call(G, partitions): + random.seed(0) num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_edge_cut(G, partitions, assignment_cu) return set(range(num_verts)), score - -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): + +DATASETS = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] +PARTITIONS = [2, 4, 8] + + +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment - cu_v_id, cu_score = cuGraph_Call(G, partitions) - v_id, rand_score = random_Call(G, partitions) - assert cu_v_id == v_id - # Assert that the partitioning has better modularity than the random assignment + cu_vid, cu_score = cugraph_call(G, partitions) + rand_vid, rand_score = random_call(G, partitions) + + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score < rand_score diff --git a/python/cugraph/spectral_clustering/test_modularity.py b/python/cugraph/spectral_clustering/test_modularity.py index 0238be2a194..5421af7640e 100644 --- a/python/cugraph/spectral_clustering/test_modularity.py +++ b/python/cugraph/spectral_clustering/test_modularity.py @@ -11,53 +11,59 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf -import numpy as np -import sys -import time -from scipy.io import mmread -import community -import os -import pytest import random -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +import pytest +from scipy.io import mmread + +import cudf +import cugraph + + +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() + - -def cuGraph_Call(G, partitions): - df = cugraph.spectralModularityMaximizationClustering(G, partitions, num_eigen_vects=(partitions - 1)) +def cugraph_call(G, partitions): + df = cugraph.spectralModularityMaximizationClustering( + G, partitions, num_eigen_vects=(partitions - 1)) score = cugraph.analyzeClustering_modularity(G, partitions, df['cluster']) return score -def random_Call(G, partitions): + +def random_call(G, partitions): num_verts = G.num_vertices() assignment = [] for i in range(num_verts): - assignment.append(random.randint(0,partitions-1)) + assignment.append(random.randint(0, partitions-1)) assignment_cu = cudf.Series(assignment) score = cugraph.analyzeClustering_modularity(G, partitions, assignment_cu) return score - -datasets = ['/datasets/networks/karate.mtx', '/datasets/networks/dolphins.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -partitions = [2, 4, 8] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('partitions', partitions) -def test_modularityClustering(graph_file, partitions): + +DATASETS = [ + '/datasets/networks/karate.mtx', + '/datasets/networks/dolphins.mtx', + '/datasets/golden_data/graphs/dblp.mtx'] +PARTITIONS = [2, 4, 8] + + +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('partitions', PARTITIONS) +def test_modularity_clustering(graph_file, partitions): # Read in the graph and get a cugraph object - M = ReadMtxFile(graph_file).tocsr() + M = read_mtx_file(graph_file).tocsr() row_offsets = cudf.Series(M.indptr) col_indices = cudf.Series(M.indices) values = cudf.Series(M.data) G = cugraph.Graph() G.add_adj_list(row_offsets, col_indices, values) - + # Get the modularity score for partitioning versus random assignment - cu_score = cuGraph_Call(G, partitions) - rand_score = random_Call(G, partitions) - - # Assert that the partitioning has better modularity than the random assignment + cu_score = cugraph_call(G, partitions) + rand_score = random_call(G, partitions) + + # Assert that the partitioning has better modularity than the random + # assignment assert cu_score > rand_score diff --git a/python/cugraph/sssp/test_sssp.py b/python/cugraph/sssp/test_sssp.py index 6c3cb279804..cc638597812 100644 --- a/python/cugraph/sssp/test_sssp.py +++ b/python/cugraph/sssp/test_sssp.py @@ -11,23 +11,35 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cugraph -import cudf import time -from scipy.io import mmread -import networkx as nx + import numpy as np import pytest +from scipy.io import mmread + +import cudf +import cugraph + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + print('Networkx version : {} '.format(nx.__version__)) -def ReadMtxFile(mmFile): - print('Reading ' + str(mmFile) + '...') - return mmread(mmFile).asfptype() +def read_mtx_file(mm_file): + print('Reading ' + str(mm_file) + '...') + return mmread(mm_file).asfptype() -def cugraph_Call(M, source): +def cugraph_call(M, source): # Device data sources = cudf.Series(M.row) @@ -56,7 +68,7 @@ def cugraph_Call(M, source): return distances -def networkx_Call(M, source): +def networkx_call(M, source): print('Format conversion ... ') M = M.tocsr() @@ -71,7 +83,7 @@ def networkx_Call(M, source): print('NX Solving... ') t1 = time.time() - path=nx.single_source_shortest_path(Gnx, source) + path = nx.single_source_shortest_path(Gnx, source) t2 = time.time() - t1 @@ -80,20 +92,20 @@ def networkx_Call(M, source): return path -datasets = ['/datasets/networks/dolphins.mtx', +DATASETS = ['/datasets/networks/dolphins.mtx', '/datasets/networks/karate.mtx', '/datasets/golden_data/graphs/dblp.mtx'] -source = [1] +SOURCES = [1] -@pytest.mark.parametrize('graph_file', datasets) -@pytest.mark.parametrize('source', source) +@pytest.mark.parametrize('graph_file', DATASETS) +@pytest.mark.parametrize('source', SOURCES) def test_sssp(graph_file, source): - M = ReadMtxFile(graph_file) - cu_paths = cugraph_Call(M, source) - nx_paths = networkx_Call(M, source) + M = read_mtx_file(graph_file) + cu_paths = cugraph_call(M, source) + nx_paths = networkx_call(M, source) # Calculating mismatch err = 0 diff --git a/python/setup.py b/python/setup.py index d56904c1509..94e6358e930 100644 --- a/python/setup.py +++ b/python/setup.py @@ -1,30 +1,31 @@ # Copyright (c) 2018, NVIDIA CORPORATION. -from setuptools import setup, find_packages +from distutils.sysconfig import get_python_lib +import os +from os.path import join as pjoin +import sys + +from setuptools import setup from setuptools.extension import Extension from Cython.Build import cythonize import numpy - import versioneer -from distutils.sysconfig import get_python_lib -from os.path import join as pjoin -import os -import sys -install_requires = [ - 'numba', - 'cython' -] + +INSTALL_REQUIRES = ['numba', 'cython'] + def find_in_path(name, path): "Find a file in a search path" - #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) + # adapted fom + # http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path + for directory in path.split(os.pathsep): + binpath = pjoin(directory, name) if os.path.exists(binpath): return os.path.abspath(binpath) return None + def locate_cuda(): """Locate the CUDA environment on the system Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' @@ -41,46 +42,55 @@ def locate_cuda(): # otherwise, search the PATH for NVCC nvcc = find_in_path('nvcc', os.environ['PATH']) if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME') + raise EnvironmentError( + 'The nvcc binary could not be located in your $PATH. ' + 'Either add it to your path, or set $CUDAHOME') home = os.path.dirname(os.path.dirname(nvcc)) - cudaconfig = {'home':home, 'nvcc':nvcc, + cudaconfig = {'home': home, 'nvcc': nvcc, 'include': pjoin(home, 'include'), 'lib64': pjoin(home, 'lib64')} for k, v in iter(cudaconfig.items()): if not os.path.exists(v): - raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) + raise EnvironmentError( + 'The CUDA %s path could not be located in %s' % (k, v)) return cudaconfig + def locate_nvgraph(): if 'CONDA_PREFIX' in os.environ: - nvgraph_found = find_in_path('lib/libnvgraph_st.so', os.environ['CONDA_PREFIX']) + nvgraph_found = find_in_path('lib/libnvgraph_st.so', + os.environ['CONDA_PREFIX']) if nvgraph_found is None: - nvgraph_found = find_in_path('libnvgraph_st.so', os.environ['LD_LIBRARY_PATH']) + nvgraph_found = find_in_path('libnvgraph_st.so', + os.environ['LD_LIBRARY_PATH']) if nvgraph_found is None: - raise EnvironmentError('The nvgraph library could not be located') - nvgraph_config = {'include':pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), 'include', 'nvgraph'), - 'lib':os.path.dirname(nvgraph_found)} + raise EnvironmentError('The nvgraph library could not be located') + nvgraph_config = { + 'include': pjoin(os.path.dirname(os.path.dirname(nvgraph_found)), + 'include', 'nvgraph'), + 'lib': os.path.dirname(nvgraph_found)} + return nvgraph_config - + + CUDA = locate_cuda() NVGRAPH = locate_nvgraph() try: - numpy_include = numpy.get_include() + NUMPY_INCLUDE = numpy.get_include() except AttributeError: - numpy_include = numpy.get_numpy_include() + NUMPY_INCLUDE = numpy.get_numpy_include() -cudf_include = os.path.normpath(sys.prefix) + '/include' -cython_files = ['cugraph/*.pyx'] +CUDF_INCLUDE = os.path.normpath(sys.prefix) + '/include' +CYTHON_FILES = ['cugraph/*.pyx'] -extensions = [ +EXTENSIONS = [ Extension("cugraph", - sources=cython_files, - include_dirs=[numpy_include, - cudf_include, + sources=CYTHON_FILES, + include_dirs=[NUMPY_INCLUDE, + CUDF_INCLUDE, NVGRAPH['include'], CUDA['include'], '../cpp/src', @@ -98,19 +108,18 @@ def locate_nvgraph(): description="cuGraph - GPU Graph Analytics", version=versioneer.get_version(), classifiers=[ - # "Development Status :: 4 - Beta", - "Intended Audience :: Developers", - # "Operating System :: OS Independent", - "Programming Language :: Python", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7" + # "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + # "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7" ], # Include the separately-compiled shared library author="NVIDIA Corporation", setup_requires=['cython'], - ext_modules=cythonize(extensions), - install_requires=install_requires, + ext_modules=cythonize(EXTENSIONS), + install_requires=INSTALL_REQUIRES, license="Apache", cmdclass=versioneer.get_cmdclass(), - zip_safe=False - ) + zip_safe=False)