From 0c6aff1bd439427270105813ac07046fafe02d9b Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Tue, 25 Jul 2023 06:09:35 +0100 Subject: [PATCH] Refactor RMAT (#3662) This PR refactors the RMAT generator, update docstrings and tests Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3662 --- cpp/CMakeLists.txt | 1 - cpp/include/cugraph/graph_generators.hpp | 38 +- cpp/include/cugraph/utilities/cython.hpp | 71 --- cpp/include/cugraph_c/graph_generators.h | 8 + cpp/src/c_api/graph_generators.cpp | 13 +- cpp/src/generators/generate_rmat_edgelist.cu | 58 +- cpp/src/utilities/cython.cu | 167 ----- cpp/tests/c_api/generate_rmat_test.c | 2 + python/cugraph/CMakeLists.txt | 1 - python/cugraph/cugraph/dask/comms/comms.pxd | 10 +- .../cugraph/dask/comms/comms_wrapper.pyx | 6 +- .../cugraph/cugraph/generators/CMakeLists.txt | 22 - python/cugraph/cugraph/generators/rmat.pxd | 53 -- python/cugraph/cugraph/generators/rmat.py | 580 ++++++++++++++---- .../cugraph/generators/rmat_wrapper.pyx | 165 ----- .../cugraph/structure/graph_utilities.pxd | 6 - .../cugraph/tests/generators/test_rmat.py | 300 ++++++++- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 2 + python/pylibcugraph/pylibcugraph/__init__.py | 4 + .../_cugraph_c/graph_generators.pxd | 11 +- .../_cugraph_c/resource_handle.pxd | 4 +- python/pylibcugraph/pylibcugraph/bfs.pyx | 4 - .../pylibcugraph/components/_connectivity.pxd | 8 - .../pylibcugraph/generate_rmat_edgelist.pyx | 246 ++++++++ .../pylibcugraph/generate_rmat_edgelists.pyx | 273 +++++++++ python/pylibcugraph/pylibcugraph/graphs.pyx | 17 - python/pylibcugraph/pylibcugraph/hits.pyx | 2 +- .../pylibcugraph/katz_centrality.pyx | 4 +- python/pylibcugraph/pylibcugraph/node2vec.pyx | 3 +- python/pylibcugraph/pylibcugraph/pagerank.pyx | 6 - .../pylibcugraph/personalized_pagerank.pyx | 6 - .../structure/graph_utilities.pxd | 53 -- .../pylibcugraph/tests/test_rmat.py | 99 +++ .../pylibcugraph/triangle_count.pyx | 3 +- .../pylibcugraph/two_hop_neighbors.pyx | 5 +- .../pylibcugraph/uniform_neighbor_sample.pyx | 8 - .../pylibcugraph/uniform_random_walks.pyx | 10 +- python/pylibcugraph/pylibcugraph/utils.pyx | 11 +- .../weakly_connected_components.pyx | 6 +- 39 files changed, 1485 insertions(+), 801 deletions(-) delete mode 100644 cpp/include/cugraph/utilities/cython.hpp delete mode 100644 cpp/src/utilities/cython.cu delete mode 100644 python/cugraph/cugraph/generators/CMakeLists.txt delete mode 100644 python/cugraph/cugraph/generators/rmat.pxd delete mode 100644 python/cugraph/cugraph/generators/rmat_wrapper.pyx create mode 100644 python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx create mode 100644 python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx delete mode 100644 python/pylibcugraph/pylibcugraph/structure/graph_utilities.pxd create mode 100644 python/pylibcugraph/pylibcugraph/tests/test_rmat.py diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 8fedfbb23f3..90f33574d6f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -192,7 +192,6 @@ set(CUGRAPH_SOURCES src/community/detail/mis_mg.cu src/detail/utility_wrappers.cu src/structure/graph_view_mg.cu - src/utilities/cython.cu src/utilities/path_retrieval.cu src/structure/legacy/graph.cu src/linear_assignment/legacy/hungarian.cu diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp index 4944e0f4917..5e8e97c51a2 100644 --- a/cpp/include/cugraph/graph_generators.hpp +++ b/cpp/include/cugraph/graph_generators.hpp @@ -66,6 +66,9 @@ namespace cugraph { * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values + * and vertex degrees. * @return std::tuple, rmm::device_uvector> A tuple of * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. */ @@ -74,11 +77,12 @@ std::tuple, rmm::device_uvector> generat raft::handle_t const& handle, size_t scale, size_t num_edges, - double a = 0.57, - double b = 0.19, - double c = 0.19, - uint64_t seed = 0, - bool clip_and_flip = false); + double a = 0.57, + double b = 0.19, + double c = 0.19, + uint64_t seed = 0, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); /** * @brief generate an edge list for an R-mat graph. @@ -113,6 +117,9 @@ std::tuple, rmm::device_uvector> generat * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values + * and vertex degrees. * @return std::tuple, rmm::device_uvector> A tuple of * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. */ @@ -122,10 +129,11 @@ std::tuple, rmm::device_uvector> generat raft::random::RngState& rng_state, size_t scale, size_t num_edges, - double a = 0.57, - double b = 0.19, - double c = 0.19, - bool clip_and_flip = false); + double a = 0.57, + double b = 0.19, + double c = 0.19, + bool clip_and_flip = false, + bool scramble_vertex_ids = false); /** * @brief generate an edge list for a bipartite R-mat graph. @@ -199,6 +207,9 @@ enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values + * and vertex degrees. * @return A vector of std::tuple, rmm::device_uvector> of *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge *source vertex IDs and edge destination vertex IDs. @@ -214,7 +225,8 @@ generate_rmat_edgelists( generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, uint64_t seed = 0, - bool clip_and_flip = false); + bool clip_and_flip = false, + bool scramble_vertex_ids = false); /** * @brief generate multiple edge lists using the R-mat graph generator. @@ -245,6 +257,9 @@ generate_rmat_edgelists( * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to * `false`). + * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) + * or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values + * and vertex degrees. * @return A vector of std::tuple, rmm::device_uvector> of *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge *source vertex IDs and edge destination vertex IDs. @@ -260,7 +275,8 @@ generate_rmat_edgelists( size_t edge_factor = 16, generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, - bool clip_and_flip = false); + bool clip_and_flip = false, + bool scramble_vertex_ids = false); /** * @brief generate an edge list for path graph diff --git a/cpp/include/cugraph/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp deleted file mode 100644 index 2573752cb98..00000000000 --- a/cpp/include/cugraph/utilities/cython.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include - -#include - -namespace cugraph { -namespace cython { - -struct graph_generator_t { - std::unique_ptr d_source; - std::unique_ptr d_destination; -}; - -// Wrapper for calling graph generator -template -std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a, - double b, - double c, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); -template -std::vector, std::unique_ptr>> -call_generate_rmat_edgelists(raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::generator_distribution_t size_distribution, - cugraph::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); - -// Helper for setting up subcommunicators, typically called as part of the -// user-initiated comms initialization in Python. -// -// raft::handle_t& handle -// Raft handle for which the new subcommunicators will be created. The -// subcommunicators will then be accessible from the handle passed to the -// parallel processes. -// -// size_t row_comm_size -// Number of items in a partition row (ie. pcols), needed for creating the -// appropriate number of subcommunicator instances. -void init_subcomms(raft::handle_t& handle, size_t row_comm_size); - -} // namespace cython -} // namespace cugraph diff --git a/cpp/include/cugraph_c/graph_generators.h b/cpp/include/cugraph_c/graph_generators.h index 9da6ee48fc7..36cded27ba6 100644 --- a/cpp/include/cugraph_c/graph_generators.h +++ b/cpp/include/cugraph_c/graph_generators.h @@ -136,6 +136,9 @@ void cugraph_coo_list_free(cugraph_coo_list_t* coo_list); * @param [in] clip_and_flip Flag controlling whether to generate edges only in the lower * triangular part (including the diagonal) of the graph adjacency matrix (if set to `true`) or not * (if set to `false`). + * @param [in] scramble_vertex_ids Flag controlling whether to scramble vertex ID bits + * (if set to `true`) or not (if set to `false`); scrambling vertex ID bits breaks correlation + * between vertex ID values and vertex degrees. * @param [out] result Opaque pointer to generated coo * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS @@ -149,6 +152,7 @@ cugraph_error_code_t cugraph_generate_rmat_edgelist(const cugraph_resource_handl double b, double c, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_t** result, cugraph_error_t** error); @@ -172,6 +176,9 @@ cugraph_error_code_t cugraph_generate_rmat_edgelist(const cugraph_resource_handl * @param [in] clip_and_flip Flag controlling whether to generate edges only in the lower * triangular part (including the diagonal) of the graph adjacency matrix (if set to `true`) or not * (if set to `false`). + * @param [in] scramble_vertex_ids Flag controlling whether to scramble vertex ID bits + * (if set to `true`) or not (if set to `false`); scrambling vertex ID bits breaks correlation + * between vertex ID values and vertex degrees. * @param [out] result Opaque pointer to generated coo list * @param [out] error Pointer to an error object storing details of any error. Will * be populated if error code is not CUGRAPH_SUCCESS @@ -187,6 +194,7 @@ cugraph_error_code_t cugraph_generate_rmat_edgelists( cugraph_generator_distribution_t size_distribution, cugraph_generator_distribution_t edge_distribution, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_list_t** result, cugraph_error_t** error); diff --git a/cpp/src/c_api/graph_generators.cpp b/cpp/src/c_api/graph_generators.cpp index 38f91b3135b..adf49de7afe 100644 --- a/cpp/src/c_api/graph_generators.cpp +++ b/cpp/src/c_api/graph_generators.cpp @@ -56,12 +56,13 @@ cugraph_error_code_t cugraph_generate_rmat_edgelist(raft::handle_t const& handle double b, double c, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph::c_api::cugraph_coo_t** result, cugraph::c_api::cugraph_error_t** error) { try { auto [src, dst] = cugraph::generate_rmat_edgelist( - handle, rng_state, scale, num_edges, a, b, c, clip_and_flip); + handle, rng_state, scale, num_edges, a, b, c, clip_and_flip, scramble_vertex_ids); *result = new cugraph::c_api::cugraph_coo_t{ std::make_unique(src, vertex_dtype), @@ -90,6 +91,7 @@ cugraph_error_code_t cugraph_generate_rmat_edgelists( cugraph_generator_distribution_t size_distribution, cugraph_generator_distribution_t edge_distribution, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph::c_api::cugraph_coo_list_t** result, cugraph::c_api::cugraph_error_t** error) { @@ -103,7 +105,8 @@ cugraph_error_code_t cugraph_generate_rmat_edgelists( edge_factor, static_cast(size_distribution), static_cast(edge_distribution), - clip_and_flip); + clip_and_flip, + scramble_vertex_ids); *result = new cugraph::c_api::cugraph_coo_list_t; (*result)->list_.resize(tuple_vector.size()); @@ -200,6 +203,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelist( double b, double c, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_t** result, cugraph_error_t** error) { @@ -219,6 +223,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelist( b, c, clip_and_flip, + scramble_vertex_ids, reinterpret_cast(result), reinterpret_cast(error)); } else { @@ -232,6 +237,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelist( b, c, clip_and_flip, + scramble_vertex_ids, reinterpret_cast(result), reinterpret_cast(error)); } @@ -247,6 +253,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelists( cugraph_generator_distribution_t size_distribution, cugraph_generator_distribution_t edge_distribution, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_list_t** result, cugraph_error_t** error) { @@ -267,6 +274,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelists( size_distribution, edge_distribution, clip_and_flip, + scramble_vertex_ids, reinterpret_cast(result), reinterpret_cast(error)); } else { @@ -281,6 +289,7 @@ extern "C" cugraph_error_code_t cugraph_generate_rmat_edgelists( size_distribution, edge_distribution, clip_and_flip, + scramble_vertex_ids, reinterpret_cast(result), reinterpret_cast(error)); } diff --git a/cpp/src/generators/generate_rmat_edgelist.cu b/cpp/src/generators/generate_rmat_edgelist.cu index 6789306ab21..bcafd2661d5 100644 --- a/cpp/src/generators/generate_rmat_edgelist.cu +++ b/cpp/src/generators/generate_rmat_edgelist.cu @@ -42,7 +42,8 @@ std::tuple, rmm::device_uvector> generat double a, double b, double c, - bool clip_and_flip) + bool clip_and_flip, + bool scramble_vertex_ids) { CUGRAPH_EXPECTS((size_t{1} << scale) <= static_cast(std::numeric_limits::max()), "Invalid input argument: scale too large for vertex_t."); @@ -104,7 +105,11 @@ std::tuple, rmm::device_uvector> generat num_edges_generated += num_edges_to_generate; } - return std::make_tuple(std::move(srcs), std::move(dsts)); + if (scramble_vertex_ids) { + return cugraph::scramble_vertex_ids(handle, std::move(srcs), std::move(dsts), scale); + } else { + return std::make_tuple(std::move(srcs), std::move(dsts)); + } } template @@ -116,12 +121,13 @@ std::tuple, rmm::device_uvector> generat double b, double c, uint64_t seed, - bool clip_and_flip) + bool clip_and_flip, + bool scramble_vertex_ids) { raft::random::RngState rng_state(seed); return generate_rmat_edgelist( - handle, rng_state, scale, num_edges, a, b, c, clip_and_flip); + handle, rng_state, scale, num_edges, a, b, c, clip_and_flip, scramble_vertex_ids); } template @@ -134,7 +140,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, size_t edge_factor, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, - bool clip_and_flip) + bool clip_and_flip, + bool scramble_vertex_ids) { CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); CUGRAPH_EXPECTS( @@ -181,8 +188,15 @@ generate_rmat_edgelists(raft::handle_t const& handle, } for (size_t i = 0; i < n_edgelists; i++) { - output.push_back(generate_rmat_edgelist( - handle, rng_state, scale[i], scale[i] * edge_factor, a, b, c, clip_and_flip)); + output.push_back(generate_rmat_edgelist(handle, + rng_state, + scale[i], + scale[i] * edge_factor, + a, + b, + c, + clip_and_flip, + scramble_vertex_ids)); } return output; } @@ -197,7 +211,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip) + bool clip_and_flip, + bool scramble_vertex_ids) { raft::random::RngState rng_state(seed); @@ -209,7 +224,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, edge_factor, size_distribution, edge_distribution, - clip_and_flip); + clip_and_flip, + scramble_vertex_ids); } template std::tuple, rmm::device_uvector> @@ -220,7 +236,8 @@ generate_rmat_edgelist(raft::handle_t const& handle, double a, double b, double c, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -230,7 +247,8 @@ generate_rmat_edgelist(raft::handle_t const& handle, double a, double b, double c, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -241,7 +259,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, size_t edge_factor, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -252,7 +271,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, size_t edge_factor, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); /* DEPRECATED */ template std::tuple, rmm::device_uvector> @@ -263,7 +283,8 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -273,7 +294,8 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -284,7 +306,8 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -295,6 +318,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t size_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip); + bool clip_and_flip, + bool scramble_vertex_ids); } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu deleted file mode 100644 index 36e231ad570..00000000000 --- a/cpp/src/utilities/cython.cu +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include - -#include - -#include - -#include -#include - -namespace cugraph { -namespace cython { - -// Wrapper for graph generate_rmat_edgelist() -// to expose the API to cython -// enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; -template -std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a, - double b, - double c, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) -{ - auto src_dst_tuple = cugraph::generate_rmat_edgelist( - handle, scale, num_edges, a, b, c, seed, clip_and_flip); - - if (scramble_vertex_ids) { - src_dst_tuple = cugraph::scramble_vertex_ids( - handle, std::move(std::get<0>(src_dst_tuple)), std::move(std::get<1>(src_dst_tuple)), scale); - } - - graph_generator_t gg_vals{ - std::make_unique(std::get<0>(src_dst_tuple).release()), - std::make_unique(std::get<1>(src_dst_tuple).release())}; - - return std::make_unique(std::move(gg_vals)); -} - -template -std::vector, std::unique_ptr>> -call_generate_rmat_edgelists(raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::generator_distribution_t size_distribution, - cugraph::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) -{ - auto src_dst_vec_tuple = cugraph::generate_rmat_edgelists(handle, - n_edgelists, - min_scale, - max_scale, - edge_factor, - size_distribution, - edge_distribution, - seed, - clip_and_flip); - - if (scramble_vertex_ids) { - std::for_each(src_dst_vec_tuple.begin(), - src_dst_vec_tuple.end(), - [&handle, max_scale, seed](auto& src_dst_tuple) { - src_dst_tuple = - cugraph::scramble_vertex_ids(handle, - std::move(std::get<0>(src_dst_tuple)), - std::move(std::get<1>(src_dst_tuple)), - max_scale); - }); - } - - std::vector, std::unique_ptr>> - gg_vec; - - std::transform( - src_dst_vec_tuple.begin(), - src_dst_vec_tuple.end(), - std::back_inserter(gg_vec), - [](auto& tpl_dev_uvec) { - return std::make_pair( - std::move(std::make_unique(std::get<0>(tpl_dev_uvec).release())), - std::move(std::make_unique(std::get<1>(tpl_dev_uvec).release()))); - }); - - return gg_vec; -} - -// Helper for setting up subcommunicators -void init_subcomms(raft::handle_t& handle, size_t row_comm_size) -{ - partition_manager::init_subcomm(handle, row_comm_size); -} - -template std::unique_ptr call_generate_rmat_edgelist( - raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a, - double b, - double c, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); - -template std::unique_ptr call_generate_rmat_edgelist( - raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a, - double b, - double c, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); - -template std::vector< - std::pair, std::unique_ptr>> -call_generate_rmat_edgelists(raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::generator_distribution_t size_distribution, - cugraph::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); - -template std::vector< - std::pair, std::unique_ptr>> -call_generate_rmat_edgelists(raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - cugraph::generator_distribution_t size_distribution, - cugraph::generator_distribution_t edge_distribution, - uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); - -} // namespace cython -} // namespace cugraph diff --git a/cpp/tests/c_api/generate_rmat_test.c b/cpp/tests/c_api/generate_rmat_test.c index 1738f080370..442031ff054 100644 --- a/cpp/tests/c_api/generate_rmat_test.c +++ b/cpp/tests/c_api/generate_rmat_test.c @@ -56,6 +56,7 @@ int test_rmat_generation() 0.19, 0.19, FALSE, + FALSE, &coo, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "generate_rmat_edgelist failed."); @@ -143,6 +144,7 @@ int test_rmat_list_generation() UNIFORM, POWER_LAW, FALSE, + FALSE, &coo_list, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "generate_rmat_edgelist failed."); diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index f405ad4f360..48815792553 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -87,7 +87,6 @@ add_subdirectory(cugraph/community) add_subdirectory(cugraph/components) add_subdirectory(cugraph/dask/comms) add_subdirectory(cugraph/dask/structure) -add_subdirectory(cugraph/generators) add_subdirectory(cugraph/internals) add_subdirectory(cugraph/layout) add_subdirectory(cugraph/linear_assignment) diff --git a/python/cugraph/cugraph/dask/comms/comms.pxd b/python/cugraph/cugraph/dask/comms/comms.pxd index 3f8f8c2ca59..0b363dc047d 100644 --- a/python/cugraph/cugraph/dask/comms/comms.pxd +++ b/python/cugraph/cugraph/dask/comms/comms.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,8 +18,6 @@ from pylibraft.common.handle cimport * - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - - cdef void init_subcomms(handle_t &handle, - size_t row_comm_size) +cdef extern from "cugraph/partition_manager.hpp" namespace "cugraph::partition_manager": + cdef void init_subcomm(handle_t &handle, + size_t row_comm_size) diff --git a/python/cugraph/cugraph/dask/comms/comms_wrapper.pyx b/python/cugraph/cugraph/dask/comms/comms_wrapper.pyx index 7995c756eef..bbd462c442f 100644 --- a/python/cugraph/cugraph/dask/comms/comms_wrapper.pyx +++ b/python/cugraph/cugraph/dask/comms/comms_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. +# Copyright (c) 2020-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,10 +18,10 @@ from pylibraft.common.handle cimport * -from cugraph.dask.comms.comms cimport init_subcomms as c_init_subcomms +from cugraph.dask.comms.comms cimport init_subcomm as c_init_subcomm def init_subcomms(handle, row_comm_size): cdef size_t handle_size_t = handle.getHandle() handle_ = handle_size_t - c_init_subcomms(handle_[0], row_comm_size) + c_init_subcomm(handle_[0], row_comm_size) diff --git a/python/cugraph/cugraph/generators/CMakeLists.txt b/python/cugraph/cugraph/generators/CMakeLists.txt deleted file mode 100644 index 037e5254f21..00000000000 --- a/python/cugraph/cugraph/generators/CMakeLists.txt +++ /dev/null @@ -1,22 +0,0 @@ -# ============================================================================= -# Copyright (c) 2022, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except -# in compliance with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software distributed under the License -# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express -# or implied. See the License for the specific language governing permissions and limitations under -# the License. -# ============================================================================= - -set(cython_sources rmat_wrapper.pyx) -set(linked_libraries cugraph::cugraph) -rapids_cython_create_modules( - CXX - SOURCE_FILES "${cython_sources}" - LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX generators_ - ASSOCIATED_TARGETS cugraph -) diff --git a/python/cugraph/cugraph/generators/rmat.pxd b/python/cugraph/cugraph/generators/rmat.pxd deleted file mode 100644 index 7c3a4165e3e..00000000000 --- a/python/cugraph/cugraph/generators/rmat.pxd +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libcpp.memory cimport unique_ptr -from libcpp cimport bool -from libcpp.vector cimport vector -from libcpp.utility cimport pair - -from rmm._lib.device_buffer cimport device_buffer - -from pylibraft.common.handle cimport handle_t -from cugraph.structure.graph_utilities cimport graph_generator_t - - -cdef extern from "cugraph/graph_generators.hpp" namespace "cugraph": - ctypedef enum generator_distribution_t: - POWER_LAW "cugraph::generator_distribution_t::POWER_LAW" - UNIFORM "cugraph::generator_distribution_t::UNIFORM" - - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef unique_ptr[graph_generator_t] call_generate_rmat_edgelist[vertex_t] ( - const handle_t &handle, - size_t scale, - size_t num_edges, - double a, - double b, - double c, - int seed, - bool clip_and_flip, - bool scramble_vertex_ids) except + - - cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] call_generate_rmat_edgelists[vertex_t]( - const handle_t &handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor, - generator_distribution_t size_distribution, - generator_distribution_t edge_distribution, - int seed, - bool clip_and_flip, - bool scramble_vertex_ids) except + diff --git a/python/cugraph/cugraph/generators/rmat.py b/python/cugraph/cugraph/generators/rmat.py index e9f7515e92e..7a37e9bdaf2 100644 --- a/python/cugraph/cugraph/generators/rmat.py +++ b/python/cugraph/cugraph/generators/rmat.py @@ -11,31 +11,51 @@ # See the License for the specific language governing permissions and # limitations under the License. -from dask.distributed import default_client +from dask.distributed import default_client, wait import dask_cudf -from cugraph.generators import rmat_wrapper from cugraph.dask.comms import comms as Comms +import cudf +import numpy as np +import cupy as cp import cugraph +from pylibcugraph import generate_rmat_edgelist as pylibcugraph_generate_rmat_edgelist +from pylibcugraph import generate_rmat_edgelists as pylibcugraph_generate_rmat_edgelists +from pylibcugraph import ResourceHandle _graph_types = [cugraph.Graph, cugraph.MultiGraph] def _ensure_args_rmat( - scale, - num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids, - create_using, - mg, + scale=None, + num_edges=None, + a=None, + b=None, + c=None, + n_edgelists=None, + min_scale=None, + max_scale=None, + edge_factor=None, + size_distribution=None, + edge_distribution=None, + seed=None, + clip_and_flip=None, + scramble_vertex_ids=None, + include_edge_weights=None, + minimum_weight=None, + maximum_weight=None, + dtype=None, + include_edge_ids=None, + include_edge_types=None, + min_edge_type_value=None, + max_edge_type_value=None, + create_using=None, + mg=None, + multi_rmat=False, ): """ - Ensures the args passed in are usable for the rmat() API, raises the - appropriate exception if incorrect, else returns None. + Ensures the args passed in are usable for the rmat() or multi rmat() API, + raises the appropriate exception if incorrect, else returns None. """ if create_using is not None: if isinstance(create_using, cugraph.Graph): @@ -52,54 +72,65 @@ def _ensure_args_rmat( "(or subclass) type or instance, got: " f"{type(create_using)}" ) - if not isinstance(scale, int): - raise TypeError("'scale' must be an int") - if not isinstance(num_edges, int): - raise TypeError("'num_edges' must be an int") - if a + b + c > 1: - raise ValueError("a + b + c should be non-negative and no larger than 1.0") - if clip_and_flip not in [True, False]: - raise ValueError("'clip_and_flip' must be a bool") - if scramble_vertex_ids not in [True, False]: - raise ValueError("'scramble_vertex_ids' must be a bool") - if not isinstance(seed, int): - raise TypeError("'seed' must be an int") - - -def _ensure_args_multi_rmat( - n_edgelists, - min_scale, - max_scale, - edge_factor, - size_distribution, - edge_distribution, - seed, - clip_and_flip, - scramble_vertex_ids, -): - """ - Ensures the args passed in are usable for the multi_rmat() API, raises the - appropriate exception if incorrect, else returns None. - """ - if not isinstance(n_edgelists, int): - raise TypeError("'n_edgelists' must be an int") - if not isinstance(min_scale, int): - raise TypeError("'min_scale' must be an int") - if not isinstance(max_scale, int): - raise TypeError("'max_scale' must be an int") - if not isinstance(edge_factor, int): - raise TypeError("'edge_factor' must be an int") - if size_distribution not in [0, 1]: - raise TypeError("'size_distribution' must be either 0 or 1") - if edge_distribution not in [0, 1]: - raise TypeError("'edge_distribution' must be either 0 or 1") - if clip_and_flip not in [True, False]: - raise ValueError("'clip_and_flip' must be a bool") - if scramble_vertex_ids not in [True, False]: - raise ValueError("'scramble_vertex_ids' must be a bool") if not isinstance(seed, int): raise TypeError("'seed' must be an int") + if include_edge_weights: + if not isinstance(include_edge_weights, bool): + raise TypeError("'include_edge_weights' must be a bool") + if maximum_weight is None or minimum_weight is None: + raise ValueError( + "'maximum_weight' and 'minimum_weight' must not be 'None' " + "if 'include_edge_weights' is True" + ) + if dtype not in [ + np.float32, + np.float64, + cp.float32, + cp.float64, + "float32", + "float64", + ]: + raise ValueError( + "dtype must be either numpy or cupy 'float32' or 'float64' if " + "'include_edge_weights' is True." + ) + if include_edge_ids: + if not isinstance(include_edge_ids, bool): + raise TypeError("'include_edge_ids' must be a bool") + if include_edge_types: + if not isinstance(include_edge_types, bool): + raise TypeError("'include_edge_types' must be a bool") + if min_edge_type_value is None and max_edge_type_value is None: + raise ValueError( + "'min_edge_type_value' and 'max_edge_type_value' must not be 'None' " + "if 'include_edge_types' is True" + ) + + if multi_rmat: + if not isinstance(n_edgelists, int): + raise TypeError("'n_edgelists' must be an int") + if not isinstance(min_scale, int): + raise TypeError("'min_scale' must be an int") + if not isinstance(max_scale, int): + raise TypeError("'max_scale' must be an int") + if not isinstance(edge_factor, int): + raise TypeError("'edge_factor' must be an int") + if size_distribution not in [0, 1]: + raise TypeError("'size_distribution' must be either 0 or 1") + if edge_distribution not in [0, 1]: + raise TypeError("'edge_distribution' must be either 0 or 1") + else: + if not isinstance(scale, int): + raise TypeError("'scale' must be an int") + if not isinstance(num_edges, int): + raise TypeError("'num_edges' must be an int") + if a + b + c > 1: + raise ValueError("a + b + c should be non-negative and no larger than 1.0") + if not isinstance(clip_and_flip, bool): + raise TypeError("'clip_and_flip' must be a bool") + if not isinstance(scramble_vertex_ids, bool): + raise TypeError("'scramble_vertex_ids' must be a bool") def _sg_rmat( @@ -111,6 +142,14 @@ def _sg_rmat( seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, create_using=cugraph.Graph, ): """ @@ -118,9 +157,48 @@ def _sg_rmat( to initialize and return a cugraph Graph object specified with create_using. If create_using is None, returns the edgelist df as-is. """ - df = rmat_wrapper.generate_rmat_edgelist( - scale, num_edges, a, b, c, seed, clip_and_flip, scramble_vertex_ids + + # FIXME: add deprecation warning for the parameter 'seed' and rename it + # 'random_state' + random_state = seed + multi_gpu = False + src, dst, weights, edge_id, edge_type = pylibcugraph_generate_rmat_edgelist( + ResourceHandle(), + random_state, + scale, + num_edges, + a, + b, + c, + clip_and_flip, + scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, + multi_gpu, ) + + df = cudf.DataFrame() + df["src"] = src + df["dst"] = dst + + if include_edge_weights: + df["weights"] = weights + weights = "weights" + + if include_edge_ids: + df["edge_id"] = edge_id + edge_id = "edge_id" + + if include_edge_types: + df["edge_type"] = edge_type + edge_type = "edge_type" + if create_using is None: return df @@ -135,11 +213,36 @@ def _sg_rmat( "(or subclass) type or instance, got: " f"{type(create_using)}" ) - G.from_cudf_edgelist(df, source="src", destination="dst", renumber=False) + G.from_cudf_edgelist( + df, + source="src", + destination="dst", + weight=weights, + edge_id=edge_id, + edge_type=edge_type, + renumber=False, + ) return G +def convert_to_cudf(cp_arrays): + cp_src, cp_dst, cp_edge_weights, cp_edge_ids, cp_edge_types = cp_arrays + + df = cudf.DataFrame() + df["src"] = cp_src + df["dst"] = cp_dst + + if cp_edge_weights is not None: + df["weights"] = cp_edge_weights + if cp_edge_ids is not None: + df["edge_id"] = cp_edge_ids + if cp_edge_types is not None: + df["edge_type"] = cp_edge_types + + return df + + def _mg_rmat( scale, num_edges, @@ -149,6 +252,14 @@ def _mg_rmat( seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, create_using=cugraph.Graph, ): """ @@ -163,7 +274,7 @@ def _mg_rmat( worker_list = list(client.scheduler_info()["workers"].keys()) num_workers = len(worker_list) num_edges_list = _calc_num_edges_per_worker(num_workers, num_edges) - futures = [] + result = [] for (i, worker_num_edges) in enumerate(num_edges_list): unique_worker_seed = seed + i future = client.submit( @@ -177,11 +288,25 @@ def _mg_rmat( unique_worker_seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, workers=worker_list[i], ) - futures.append(future) + result.append(future) + + wait(result) - ddf = dask_cudf.from_delayed(futures) + cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result) if create_using is None: return ddf @@ -197,7 +322,28 @@ def _mg_rmat( "(or subclass) type or instance, got: " f"{type(create_using)}" ) - G.from_dask_cudf_edgelist(ddf, source="src", destination="dst") + + weights = None + edge_id = None + edge_type = None + + if "weights" in ddf.columns: + weights = "weights" + + if "edge_id" in ddf.columns: + edge_id = "edge_id" + + if "edge_type" in ddf.columns: + edge_type = "edge_type" + + G.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + weight=weights, + edge_id=edge_id, + edge_type=edge_type, + ) return G @@ -209,26 +355,43 @@ def _call_rmat( a, b, c, - unique_worker_seed, + random_state, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, ): """ Callable passed to dask client.submit calls that extracts the individual worker handle based on the dask session ID """ - handle = Comms.get_handle(sID) + multi_gpu = True - return rmat_wrapper.generate_rmat_edgelist( + return pylibcugraph_generate_rmat_edgelist( + ResourceHandle(Comms.get_handle(sID).getHandle()), + random_state, scale, num_edges_for_worker, a, b, c, - unique_worker_seed, clip_and_flip, scramble_vertex_ids, - handle=handle, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, + multi_gpu, ) @@ -254,12 +417,20 @@ def _calc_num_edges_per_worker(num_workers, num_edges): def rmat( scale, num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids, + a=0.57, + b=0.19, + c=0.19, + seed=42, + clip_and_flip=False, + scramble_vertex_ids=False, + include_edge_weights=False, + minimum_weight=None, + maximum_weight=None, + dtype=None, + include_edge_ids=False, + include_edge_types=False, + min_edge_type_value=None, + max_edge_type_value=None, create_using=cugraph.Graph, mg=False, ): @@ -270,38 +441,71 @@ def rmat( Parameters ---------- scale : int - Scale factor to set the number of vertices in the graph Vertex IDs have - values in [0, V), where V = 1 << 'scale' + Scale factor to set the number of vertices in the graph. Vertex IDs have + values in [0, V), where V = 1 << 'scale'. num_edges : int Number of edges to generate - a : float + a : float, optional (default=0.57) Probability of the edge being in the first partition - The Graph 500 spec sets this value to 0.57 + The Graph 500 spec sets this value to 0.57. - b : float + b : float, optional (default=0.19) Probability of the edge being in the second partition - The Graph 500 spec sets this value to 0.19 + The Graph 500 spec sets this value to 0.19. - c : float + c : float, optional (default=0.19) Probability of the edge being in the third partition - The Graph 500 spec sets this value to 0.19 + The Graph 500 spec sets this value to 0.19. - seed : int - Seed value for the random number generator + seed : int, optional (default=42) + Seed value for the random number generator. - clip_and_flip : bool + clip_and_flip : bool, optional (default=False) Flag controlling whether to generate edges only in the lower triangular part (including the diagonal) of the graph adjacency matrix - (if set to 'true') or not (if set to 'false). + (if set to True) or not (if set to 'false). - scramble_vertex_ids : bool + scramble_vertex_ids : bool, optional (default=False) Flag controlling whether to scramble vertex ID bits (if set to `true`) or not (if set to `false`); scrambling vertex ID bits breaks correlation between vertex ID values and vertex degrees. + include_edge_weights : bool, optional (default=False) + Flag controlling whether to generate edges with weights + (if set to True) or not (if set to False). + + minimum_weight : float + Minimum weight value to generate if 'include_edge_weights' is True + otherwise, this parameter is ignored. + + maximum_weight : float + Maximum weight value to generate if 'include_edge_weights' is True + otherwise, this parameter is ignored. + + dtype : numpy.float32, numpy.float64, cupy.float32, cupy.float64, + "float32", "float64" + The type of weight to generate which is ignored unless + include_weights is true. + + include_edge_ids : bool, optional (default=False) + Flag controlling whether to generate edges with ids + (if set to True) or not (if set to False). + + include_edge_types : bool, optional (default=False) + Flag controlling whether to generate edges with types + (if set to True) or not (if set to False). + + min_edge_type_value : int + Minimum edge type to generate if 'include_edge_types' is True + otherwise, this parameter is ignored. + + max_edge_type_value : int + Maximum edge type to generate if 'include_edge_types' is True + otherwise, this paramter is ignored. + create_using : cugraph Graph type or None The graph type to construct containing the generated edges and vertices. If None is specified, the edgelist cuDF DataFrame (or dask_cudf DataFrame for MG) is returned @@ -311,11 +515,11 @@ def rmat( mg : bool, optional (default=False) If True, R-MAT generation occurs across multiple GPUs. If False, only a - single GPU is used. Default is False (single-GPU) + single GPU is used. Default is False (single-GPU). Returns ------- - instance of cugraph.Graph + instance of cugraph.Graph or cudf or dask_cudf DataFrame Examples -------- @@ -339,16 +543,24 @@ def rmat( """ _ensure_args_rmat( - scale, - num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids, - create_using, - mg, + scale=scale, + num_edges=num_edges, + a=a, + b=b, + c=c, + seed=seed, + clip_and_flip=clip_and_flip, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_weights=include_edge_weights, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + dtype=dtype, + include_edge_ids=include_edge_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, + create_using=create_using, + mg=mg, ) if mg: @@ -361,6 +573,14 @@ def rmat( seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, create_using, ) else: @@ -373,6 +593,14 @@ def rmat( seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, create_using, ) @@ -384,9 +612,18 @@ def multi_rmat( edge_factor, size_distribution, edge_distribution, - seed, - clip_and_flip, - scramble_vertex_ids, + seed=42, + clip_and_flip=False, + scramble_vertex_ids=False, + include_edge_weights=False, + minimum_weight=None, + maximum_weight=None, + dtype=None, + include_edge_ids=False, + include_edge_types=False, + min_edge_type_value=None, + max_edge_type_value=None, + mg=False, ): """ Generate multiple Graph objects using a Recursive MATrix (R-MAT) graph @@ -395,13 +632,13 @@ def multi_rmat( Parameters ---------- n_edgelists : int - Number of edge lists (graphs) to generate + Number of edge lists (graphs) to generate. min_scale : int - Scale factor to set the minimum number of vertices in the graph + Scale factor to set the minimum number of vertices in the graph. max_scale : int - Scale factor to set the maximum number of vertices in the graph + Scale factor to set the maximum number of vertices in the graph. edge_factor : int Average number of edges per vertex to generate @@ -409,58 +646,143 @@ def multi_rmat( size_distribution : int Distribution of the graph sizes, impacts the scale parameter of the R-MAT generator. - '0' for POWER_LAW distribution and '1' for UNIFORM distribution + '0' for POWER_LAW distribution and '1' for UNIFORM distribution. edge_distribution : int Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, are set. - '0' for POWER_LAW distribution and '1' for UNIFORM distribution + '0' for POWER_LAW distribution and '1' for UNIFORM distribution. seed : int - Seed value for the random number generator + Seed value for the random number generator. - clip_and_flip : bool + clip_and_flip : bool, optional (default=False) Flag controlling whether to generate edges only in the lower triangular part (including the diagonal) of the graph adjacency matrix - (if set to 'true') or not (if set to 'false') + (if set to True) or not (if set to False). scramble_vertex_ids : bool - Flag controlling whether to scramble vertex ID bits (if set to 'true') - or not (if set to 'false'); scrambling vertx ID bits breaks correlation - between vertex ID values and vertex degrees + Flag controlling whether to scramble vertex ID bits (if set to True) + or not (if set to False); scrambling vertx ID bits breaks correlation + between vertex ID values and vertex degrees. + + include_edge_weights : bool, optional (default=False) + Flag controlling whether to generate edges with weights + (if set to True) or not (if set to '). + + minimum_weight : float + Minimum weight value to generate if 'include_edge_weights' is True + otherwise, this parameter is ignored. + + maximum_weight : float + Maximum weight value to generate if 'include_edge_weights' is True + otherwise, this parameter is ignored. + + include_edge_ids : bool, optional (default=False) + Flag controlling whether to generate edges with ids + (if set to True) or not (if set to False). + + include_edge_types : bool, optional (default=False) + Flag controlling whether to generate edges with types + (if set to True) or not (if set to False). + + min_edge_type_value : int + Minimum edge type to generate if 'include_edge_types' is True + otherwise, this parameter is ignored. + + max_edge_type_value : int + Maximum edge type to generate if 'include_edge_types' is True + otherwise, this paramter is ignored. + + dtype : numpy.float32, numpy.float64, cupy.float32, cupy.float64, + "float32", "float64" + The type of weight to generate which is ignored unless + include_weights is true. + + mg : bool, optional (default=False) + If True, R-MATs generation occurs across multiple GPUs. If False, only a + single GPU is used. Default is False (single-GPU) + # FIXME: multi GPU RMATs generation not supported yet. Returns ------- list of cugraph.Graph instances """ - _ensure_args_multi_rmat( - n_edgelists, - min_scale, - max_scale, - edge_factor, - size_distribution, - edge_distribution, - seed, - clip_and_flip, - scramble_vertex_ids, + _ensure_args_rmat( + n_edgelists=n_edgelists, + min_scale=min_scale, + max_scale=max_scale, + edge_factor=edge_factor, + size_distribution=size_distribution, + edge_distribution=edge_distribution, + seed=seed, + include_edge_weights=include_edge_weights, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + dtype=dtype, + include_edge_ids=include_edge_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, + multi_rmat=True, + clip_and_flip=clip_and_flip, + scramble_vertex_ids=scramble_vertex_ids, ) - dfs = rmat_wrapper.generate_rmat_edgelists( + edgelists = pylibcugraph_generate_rmat_edgelists( + ResourceHandle(), + seed, n_edgelists, min_scale, max_scale, edge_factor, size_distribution, edge_distribution, - seed, clip_and_flip, scramble_vertex_ids, + include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + include_edge_ids, + include_edge_types, + min_edge_type_value, + max_edge_type_value, + mg, ) + + dfs = [] + + for edgelist in edgelists: + src, dst, weights, edge_id, edge_type = edgelist + df = cudf.DataFrame() + df["src"] = src + df["dst"] = dst + if weights is not None: + df["weights"] = weights + weights = "weights" + + if edge_id is not None: + df["edge_id"] = edge_id + edge_id = "edge_id" + if edge_type is not None: + df["edge_type"] = edge_type + edge_type = "edge_type" + + dfs.append(df) + list_G = [] for df in dfs: G = cugraph.Graph() - G.from_cudf_edgelist(df, source="src", destination="dst") + G.from_cudf_edgelist( + df, + source="src", + destination="dst", + weight=weights, + edge_id=edge_id, + edge_type=edge_type, + ) list_G.append(G) return list_G diff --git a/python/cugraph/cugraph/generators/rmat_wrapper.pyx b/python/cugraph/cugraph/generators/rmat_wrapper.pyx deleted file mode 100644 index 7f1e7f5a219..00000000000 --- a/python/cugraph/cugraph/generators/rmat_wrapper.pyx +++ /dev/null @@ -1,165 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from libc.stdint cimport uintptr_t -from libcpp cimport bool -from libcpp.memory cimport unique_ptr -from libcpp.vector cimport vector -from libcpp.utility cimport move, pair -from cython.operator cimport dereference as deref -import numpy as np - -from rmm._lib.device_buffer cimport device_buffer -import cudf - -from pylibraft.common.handle cimport handle_t -from cugraph.structure.graph_utilities cimport graph_generator_t -from cugraph.generators.rmat cimport (call_generate_rmat_edgelist, - call_generate_rmat_edgelists, - generator_distribution_t, - UNIFORM, - POWER_LAW, - ) -from cugraph.structure.graph_primtypes cimport move_device_buffer_to_column - - -def generate_rmat_edgelist( - scale, - num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids, - handle=None -): - - vertex_t = np.dtype("int32") - if (2**scale) > (2**31 - 1): - vertex_t = np.dtype("int64") - - cdef unique_ptr[handle_t] handle_ptr - cdef size_t handle_size_t - - if handle is None: - handle_ptr.reset(new handle_t()) - handle_ = handle_ptr.get() - else: - handle_size_t = handle.getHandle() - handle_ = handle_size_t - - cdef unique_ptr[graph_generator_t] gg_ret_ptr - - if (vertex_t==np.dtype("int32")): - gg_ret_ptr = move(call_generate_rmat_edgelist[int]( deref(handle_), - scale, - num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids)) - else: # (vertex_t == np.dtype("int64")) - gg_ret_ptr = move(call_generate_rmat_edgelist[long]( deref(handle_), - scale, - num_edges, - a, - b, - c, - seed, - clip_and_flip, - scramble_vertex_ids)) - - gg_ret = move(gg_ret_ptr.get()[0]) - - set_source = move_device_buffer_to_column(move(gg_ret.d_source), vertex_t) - set_destination = move_device_buffer_to_column(move(gg_ret.d_destination), vertex_t) - - df = cudf.DataFrame() - df['src'] = set_source - df['dst'] = set_destination - - return df - - -def generate_rmat_edgelists( - n_edgelists, - min_scale, - max_scale, - edge_factor, - size_distribution, - edge_distribution, - seed, - clip_and_flip, - scramble_vertex_ids - ): - - vertex_t = np.dtype("int32") - if (2**max_scale) > (2**31 - 1): - vertex_t = np.dtype("int64") - - cdef unique_ptr[handle_t] handle_ptr - handle_ptr.reset(new handle_t()) - handle_ = handle_ptr.get() - - cdef generator_distribution_t s_distribution - cdef generator_distribution_t e_distribution - if size_distribution == 0: - s_distribution= POWER_LAW - else : - s_distribution= UNIFORM - if edge_distribution == 0: - e_distribution= POWER_LAW - else : - e_distribution= UNIFORM - - cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] gg_ret_ptr - - if (vertex_t==np.dtype("int32")): - gg_ret_ptr = move(call_generate_rmat_edgelists[int]( deref(handle_), - n_edgelists, - min_scale, - max_scale, - edge_factor, - s_distribution, - e_distribution, - seed, - clip_and_flip, - scramble_vertex_ids)) - else: # (vertex_t == np.dtype("int64")) - gg_ret_ptr = move(call_generate_rmat_edgelists[long]( deref(handle_), - n_edgelists, - min_scale, - max_scale, - edge_factor, - s_distribution, - e_distribution, - seed, - clip_and_flip, - scramble_vertex_ids)) - list_df = [] - - for i in range(n_edgelists): - set_source = move_device_buffer_to_column(move(gg_ret_ptr[i].first), vertex_t) - set_destination = move_device_buffer_to_column(move(gg_ret_ptr[i].second), vertex_t) - - df = cudf.DataFrame() - df['src'] = set_source - df['dst'] = set_destination - - list_df.append(df) - - #Return a list of dataframes - return list_df diff --git a/python/cugraph/cugraph/structure/graph_utilities.pxd b/python/cugraph/cugraph/structure/graph_utilities.pxd index 0bf0f829d1b..5085aa42216 100644 --- a/python/cugraph/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/cugraph/structure/graph_utilities.pxd @@ -26,12 +26,6 @@ from rmm._lib.device_buffer cimport device_buffer from pylibraft.common.handle cimport handle_t -# C++ graph utilities -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef cppclass graph_generator_t: - unique_ptr[device_buffer] d_source - unique_ptr[device_buffer] d_destination - cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) diff --git a/python/cugraph/cugraph/tests/generators/test_rmat.py b/python/cugraph/cugraph/tests/generators/test_rmat.py index f52190f1576..876e9727b37 100644 --- a/python/cugraph/cugraph/tests/generators/test_rmat.py +++ b/python/cugraph/cugraph/tests/generators/test_rmat.py @@ -18,6 +18,9 @@ from cugraph.generators import rmat import cugraph +from cupy.sparse import coo_matrix, triu, tril +import numpy as np +import cupy as cp ############################################################################## @@ -26,9 +29,48 @@ _scale_test_ids = [f"scale={x}" for x in _scale_values] _graph_types = [cugraph.Graph, None, int] _graph_test_ids = [f"create_using={getattr(x,'__name__',str(x))}" for x in _graph_types] +_clip_and_flip = [False, True] +_clip_and_flip_test_ids = [f"clip_and_flip={x}" for x in _clip_and_flip] +_scramble_vertex_ids = [False, True] +_scramble_vertex_ids_test_ids = [ + f"scramble_vertex_ids={x}" for x in _scramble_vertex_ids +] +_include_edge_weights = [False, True] +_include_edge_weights_test_ids = [ + f"include_edge_weights={x}" for x in _include_edge_weights +] +_dtype = [np.float32, cp.float32, None, "FLOAT64", "float32"] +_dtype_test_ids = [f"_dtype={x}" for x in _dtype] +_min_max_weight_values = [[None, None], [0, 1], [2, 5]] +_min_max_weight_values_test_ids = [ + f"min_max_weight_values={x}" for x in _min_max_weight_values +] +_include_edge_ids = [False, True] +_include_edge_ids_test_ids = [f"include_edge_ids={x}" for x in _include_edge_ids] +_include_edge_types = [False, True] +_include_edge_types_test_ids = [f"include_edge_types={x}" for x in _include_edge_types] +_min_max_edge_type_values = [[None, None], [0, 1], [2, 5]] +_min_max_edge_type_values_test_ids = [ + f"min_max_edge_type_values={x}" for x in _min_max_edge_type_values +] -def _call_rmat(scale, num_edges, create_using, mg=False): +def _call_rmat( + scale, + num_edges, + create_using, + clip_and_flip=False, + scramble_vertex_ids=False, + include_edge_weights=False, + dtype=None, + minimum_weight=None, + maximum_weight=None, + include_edge_ids=False, + include_edge_types=False, + min_edge_type_value=None, + max_edge_type_value=None, + mg=False, +): """ Simplifies calling RMAT by requiring only specific args that are varied by these tests and hard-coding all others. @@ -40,36 +82,276 @@ def _call_rmat(scale, num_edges, create_using, mg=False): b=0.19, # from Graph500 c=0.19, # from Graph500 seed=24, - clip_and_flip=False, - scramble_vertex_ids=True, + clip_and_flip=clip_and_flip, + scramble_vertex_ids=scramble_vertex_ids, create_using=create_using, + include_edge_weights=include_edge_weights, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + dtype=dtype, + include_edge_ids=include_edge_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, mg=mg, ) ############################################################################### + + +@pytest.mark.sg +@pytest.mark.parametrize( + "include_edge_weights", _include_edge_weights, ids=_include_edge_weights_test_ids +) +@pytest.mark.parametrize("dtype", _dtype, ids=_dtype_test_ids) +@pytest.mark.parametrize( + "min_max_weight", _min_max_weight_values, ids=_min_max_weight_values_test_ids +) +@pytest.mark.parametrize( + "scramble_vertex_ids", _scramble_vertex_ids, ids=_scramble_vertex_ids_test_ids +) +def test_rmat_edge_weights( + include_edge_weights, dtype, min_max_weight, scramble_vertex_ids +): + """ + Verifies that the edge weights returned by rmat() are valid. Also verifies that + valid values are passed to 'dtype', 'minimum_weight' and 'maximum_weight'. + + """ + scale = 2 + num_edges = (2**scale) * 4 + create_using = None # Returns the edgelist from RMAT + minimum_weight, maximum_weight = min_max_weight + + if include_edge_weights: + if ( + minimum_weight is None + or maximum_weight is None + or dtype + not in [ + np.float32, + np.float64, + cp.float32, + cp.float64, + "float32", + "float64", + ] + ): + with pytest.raises(ValueError): + _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_weights=include_edge_weights, + dtype=dtype, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + ) + else: + df = _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_weights=include_edge_weights, + dtype=dtype, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + ) + + # Check that there is a 'weights' column + assert "weights" in df.columns + + edge_weights_err1 = df.query("{} - weights < 0.0001".format(maximum_weight)) + edge_weights_err2 = df.query( + "{} - weights > -0.0001".format(minimum_weight) + ) + + # Check that edge weights values are between 'minimum_weight' + # and 'maximum_weight. + assert len(edge_weights_err1) == 0 + assert len(edge_weights_err2) == 0 + else: + df = _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_weights=include_edge_weights, + dtype=dtype, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + ) + assert len(df.columns) == 2 + + @pytest.mark.sg -@pytest.mark.filterwarnings("ignore:make_current is deprecated:DeprecationWarning") @pytest.mark.parametrize("scale", _scale_values, ids=_scale_test_ids) -def test_rmat_edgelist(scale): +@pytest.mark.parametrize( + "include_edge_ids", _include_edge_ids, ids=_include_edge_ids_test_ids +) +@pytest.mark.parametrize( + "scramble_vertex_ids", _scramble_vertex_ids, ids=_scramble_vertex_ids_test_ids +) +def test_rmat_edge_ids(scale, include_edge_ids, scramble_vertex_ids): """ - Verifies that the edgelist returned by rmat() is valid based on inputs. + Verifies that the edge ids returned by rmat() are valid. + """ + num_edges = (2**scale) * 4 + create_using = None # Returns the edgelist from RMAT + df = _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_ids=include_edge_ids, + ) + + if include_edge_ids: + assert "edge_id" in df.columns + df["index"] = df.index + edge_id_err = df.query("index != edge_id") + assert len(edge_id_err) == 0 + + else: + assert len(df.columns) == 2 + +@pytest.mark.sg +@pytest.mark.parametrize( + "include_edge_types", + _include_edge_types, + ids=_include_edge_types_test_ids, +) +@pytest.mark.parametrize( + "min_max_edge_type_value", + _min_max_edge_type_values, + ids=_min_max_edge_type_values_test_ids, +) +@pytest.mark.parametrize( + "scramble_vertex_ids", _scramble_vertex_ids, ids=_scramble_vertex_ids_test_ids +) +def test_rmat_edge_types( + include_edge_types, min_max_edge_type_value, scramble_vertex_ids +): + """ + Verifies that the edge types returned by rmat() are valid and that valid values + are passed for 'min_edge_type_value' and 'max_edge_type_value'. + + """ + scale = 2 + num_edges = (2**scale) * 4 + create_using = None # Returns the edgelist from RMAT + min_edge_type_value, max_edge_type_value = min_max_edge_type_value + + if include_edge_types: + if min_edge_type_value is None or max_edge_type_value is None: + with pytest.raises(ValueError): + _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, + ) + else: + df = _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, + ) + + # Check that there is an 'edge_type' column + assert "edge_type" in df.columns + edge_types_err1 = df.query("{} < edge_type".format(max_edge_type_value)) + edge_types_err2 = df.query("{} > edge_type".format(min_edge_type_value)) + + # Check that edge weights values are between 'min_edge_type_value' + # and 'max_edge_type_value'. + assert len(edge_types_err1) == 0 + assert len(edge_types_err2) == 0 + else: + df = _call_rmat( + scale, + num_edges, + create_using, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_types=include_edge_types, + min_edge_type_value=min_edge_type_value, + max_edge_type_value=max_edge_type_value, + ) + assert len(df.columns) == 2 + + +@pytest.mark.sg +@pytest.mark.parametrize("scale", [2, 4, 8], ids=_scale_test_ids) +@pytest.mark.parametrize( + "include_edge_weights", _include_edge_weights, ids=_include_edge_weights_test_ids +) +@pytest.mark.parametrize("clip_and_flip", _clip_and_flip, ids=_clip_and_flip_test_ids) +def test_rmat_clip_and_flip(scale, include_edge_weights, clip_and_flip): + """ + Verifies that there are edges only in the lower triangular part of + the adjacency matrix when 'clip_and_flip' is set to 'true'. + + Note: 'scramble_vertex_ids' nullifies the effect of 'clip_and_flip' therefore + both flags should not be set to 'True' in order to test the former + + """ num_edges = (2**scale) * 4 create_using = None # Returns the edgelist from RMAT + minimum_weight = 0 + maximum_weight = 1 + dtype = np.float32 + df = _call_rmat( + scale, + num_edges, + create_using, + clip_and_flip=clip_and_flip, + scramble_vertex_ids=False, + include_edge_weights=include_edge_weights, + dtype=dtype, + minimum_weight=minimum_weight, + maximum_weight=maximum_weight, + ) + + if not include_edge_weights: + df["weights"] = 1 + # cupy coo_matrix only support 'float32', 'float64', 'complex64' + # and 'complex128'. + df["weights"] = df["weights"].astype("float32") - df = _call_rmat(scale, num_edges, create_using) - assert len(df) == num_edges + dim = df[["src", "dst"]].max().max() + 1 + src = df["src"].to_cupy() + dst = df["dst"].to_cupy() + weights = df["weights"].to_cupy() + adj_matrix = coo_matrix((weights, (src, dst)), shape=(dim, dim)).toarray() + + upper_coo = triu(adj_matrix) + diag = tril(upper_coo) + + if clip_and_flip: + # Except the diagonal, There should be no edge in the upper triangular part of + # the graph adjacency matrix. + assert diag.nnz == upper_coo.nnz @pytest.mark.sg -@pytest.mark.filterwarnings("ignore:make_current is deprecated:DeprecationWarning") @pytest.mark.parametrize("graph_type", _graph_types, ids=_graph_test_ids) def test_rmat_return_type(graph_type): """ Verifies that the return type returned by rmat() is valid (or the proper exception is raised) based on inputs. + """ scale = 2 num_edges = (2**scale) * 4 diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index 7c50456eb4d..6a09c3de0da 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -27,6 +27,8 @@ set(cython_sources ecg.pyx egonet.pyx eigenvector_centrality.pyx + generate_rmat_edgelist.pyx + generate_rmat_edgelists.pyx graph_properties.pyx graphs.pyx hits.pyx diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index 5c03d8f98cc..c39075ce3fb 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -81,6 +81,10 @@ from pylibcugraph.select_random_vertices import select_random_vertices +from pylibcugraph.generate_rmat_edgelist import generate_rmat_edgelist + +from pylibcugraph.generate_rmat_edgelists import generate_rmat_edgelists + from pylibcugraph import exceptions __version__ = "23.08.00" diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd index 037aadd09cf..f6d62377443 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_generators.pxd @@ -16,6 +16,7 @@ from pylibcugraph._cugraph_c.resource_handle cimport ( cugraph_resource_handle_t, + cugraph_data_type_id_t, bool_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -82,7 +83,7 @@ cdef extern from "cugraph_c/graph_generators.h": ) cdef void \ - cugraph_list_coo_free( + cugraph_coo_list_free( cugraph_coo_list_t* coo_list ) @@ -95,7 +96,8 @@ cdef extern from "cugraph_c/graph_generators.h": double a, double b, double c, - bool clip_and_flip, + bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_t** result, cugraph_error_t** error ) @@ -111,6 +113,7 @@ cdef extern from "cugraph_c/graph_generators.h": cugraph_generator_distribution_t size_distribution, cugraph_generator_distribution_t edge_distribution, bool_t clip_and_flip, + bool_t scramble_vertex_ids, cugraph_coo_list_t** result, cugraph_error_t** error ) @@ -139,7 +142,7 @@ cdef extern from "cugraph_c/graph_generators.h": const cugraph_resource_handle_t* handle, cugraph_rng_state_t* rng_state, cugraph_coo_t* coo, - int32_t min_edge_type, - int32_t max_edge_type, + int min_edge_type, + int max_edge_type, cugraph_error_t** error ) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd index 633107a1acb..e9e74723e06 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/resource_handle.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -29,6 +29,8 @@ cdef extern from "cugraph_c/resource_handle.h": FLOAT32 FLOAT64 SIZE_T + + ctypedef data_type_id_t cugraph_data_type_id_t ctypedef int8_t byte_t diff --git a/python/pylibcugraph/pylibcugraph/bfs.pyx b/python/pylibcugraph/pylibcugraph/bfs.pyx index b9d17f15cc5..3034dcc8cb1 100644 --- a/python/pylibcugraph/pylibcugraph/bfs.pyx +++ b/python/pylibcugraph/pylibcugraph/bfs.pyx @@ -28,15 +28,11 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_paths_result_free, ) from pylibcugraph._cugraph_c.array cimport ( - cugraph_type_erased_device_array_view, - cugraph_type_erased_device_array_t, cugraph_type_erased_device_array_view_t, cugraph_type_erased_device_array_view_create, - cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( diff --git a/python/pylibcugraph/pylibcugraph/components/_connectivity.pxd b/python/pylibcugraph/pylibcugraph/components/_connectivity.pxd index 205df45f4a4..b89520709d9 100644 --- a/python/pylibcugraph/pylibcugraph/components/_connectivity.pxd +++ b/python/pylibcugraph/pylibcugraph/components/_connectivity.pxd @@ -17,7 +17,6 @@ # cython: language_level = 3 from pylibcugraph.structure.graph_primtypes cimport * -from pylibcugraph.structure.graph_utilities cimport * cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": @@ -30,10 +29,3 @@ cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, cugraph_cc_t connect_type, VT *labels) except + - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - cdef void call_wcc[vertex_t, weight_t]( - const handle_t &handle, - const graph_container_t &g, - vertex_t *identifiers) except + - diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx new file mode 100644 index 00000000000..d09d60ff15b --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelist.pyx @@ -0,0 +1,246 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + + +from pylibcugraph._cugraph_c.resource_handle cimport ( + cugraph_resource_handle_t, + bool_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph._cugraph_c.graph_generators cimport ( + cugraph_generate_rmat_edgelist, + cugraph_generate_edge_weights, + cugraph_generate_edge_ids, + cugraph_generate_edge_types, + cugraph_coo_t, + cugraph_coo_get_sources, + cugraph_coo_get_destinations, + cugraph_coo_get_edge_weights, + cugraph_coo_get_edge_id, + cugraph_coo_get_edge_type, + cugraph_coo_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.utils cimport ( + assert_success, + copy_to_cupy_array, + get_c_type_from_numpy_type, +) +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) + + +def generate_rmat_edgelist(ResourceHandle resource_handle, + random_state, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + bool_t clip_and_flip, + bool_t scramble_vertex_ids, + bool_t include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + bool_t include_edge_ids, + bool_t include_edge_types, + min_edge_type_value, + max_edge_type_value, + bool_t multi_gpu, + ): + """ + Generate RMAT edge list + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + random_state : int , optional + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + + scale : size_t + Scale factor to set the number of vertices in the graph Vertex IDs have + values in [0, V), where V = 1 << 'scale' + + num_edges : size_t + Number of edges to generate + + a : double + Probability of the edge being in the first partition + The Graph 500 spec sets this value to 0.57 + + b : double + Probability of the edge being in the second partition + The Graph 500 spec sets this value to 0.19 + + c : double + Probability of the edge being in the third partition + The Graph 500 spec sets this value to 0.19 + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false). + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to `true`) + or not (if set to `false`); scrambling vertex ID bits breaks + correlation between vertex ID values and vertex degrees. + + include_edge_weights : bool + Flag controlling whether to generate edges with weights + (if set to 'true') or not (if set to 'false'). + + minimum_weight : double + Minimum weight value to generate (if 'include_edge_weights' is 'true') + + maximum_weight : double + Maximum weight value to generate (if 'include_edge_weights' is 'true') + + dtype : string + The type of weight to generate ("FLOAT32" or "FLOAT64"), ignored unless + include_weights is true + + include_edge_ids : bool + Flag controlling whether to generate edges with ids + (if set to 'true') or not (if set to 'false'). + + include_edge_types : bool + Flag controlling whether to generate edges with types + (if set to 'true') or not (if set to 'false'). + + min_edge_type_value : int + Minimum edge type to generate if 'include_edge_types' is 'true' + otherwise, this parameter is ignored. + + max_edge_type_value : int + Maximum edge type to generate if 'include_edge_types' is 'true' + otherwise, this paramter is ignored. + + multi_gpu : bool + Flag if the COO is being created on multiple GPUs + + + Returns + ------- + return a tuple containing the sources and destinations with their corresponding + weights, ids and types if the flags 'include_edge_weights', 'include_edge_ids' + and 'include_edge_types' are respectively set to 'true' + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + + cdef cugraph_coo_t* result_coo_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = \ + cg_rng_state.rng_state_ptr + + error_code = cugraph_generate_rmat_edgelist(c_resource_handle_ptr, + rng_state_ptr, + scale, + num_edges, + a, + b, + c, + clip_and_flip, + scramble_vertex_ids, + &result_coo_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "generate_rmat_edgelist") + + cdef cugraph_type_erased_device_array_view_t* \ + sources_view_ptr = cugraph_coo_get_sources(result_coo_ptr) + + cdef cugraph_type_erased_device_array_view_t* \ + destinations_view_ptr = cugraph_coo_get_destinations(result_coo_ptr) + + cdef cugraph_type_erased_device_array_view_t* edge_weights_view_ptr + + cupy_edge_weights = None + cupy_edge_ids = None + cupy_edge_types = None + + if include_edge_weights: + dtype = get_c_type_from_numpy_type(dtype) + error_code = cugraph_generate_edge_weights(c_resource_handle_ptr, + rng_state_ptr, + result_coo_ptr, + dtype, + minimum_weight, + maximum_weight, + &error_ptr) + assert_success(error_code, error_ptr, "generate_edge_weights") + + edge_weights_view_ptr = cugraph_coo_get_edge_weights(result_coo_ptr) + cupy_edge_weights = copy_to_cupy_array(c_resource_handle_ptr, edge_weights_view_ptr) + + + if include_edge_ids: + error_code = cugraph_generate_edge_ids(c_resource_handle_ptr, + result_coo_ptr, + multi_gpu, + &error_ptr) + + assert_success(error_code, error_ptr, "generate_edge_ids") + + edge_ids_view_ptr = cugraph_coo_get_edge_id(result_coo_ptr) + cupy_edge_ids = copy_to_cupy_array(c_resource_handle_ptr, edge_ids_view_ptr) + + if include_edge_types: + error_code = cugraph_generate_edge_types(c_resource_handle_ptr, + rng_state_ptr, + result_coo_ptr, + min_edge_type_value, + max_edge_type_value, + &error_ptr) + + assert_success(error_code, error_ptr, "generate_edge_types") + + edge_type_view_ptr = cugraph_coo_get_edge_type(result_coo_ptr) + cupy_edge_types = copy_to_cupy_array(c_resource_handle_ptr, edge_type_view_ptr) + + + + + + cupy_sources = copy_to_cupy_array(c_resource_handle_ptr, sources_view_ptr) + cupy_destinations = copy_to_cupy_array(c_resource_handle_ptr, destinations_view_ptr) + + cugraph_coo_free(result_coo_ptr) + + return cupy_sources, cupy_destinations, cupy_edge_weights, cupy_edge_ids, cupy_edge_types diff --git a/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx new file mode 100644 index 00000000000..d5a89f8a222 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/generate_rmat_edgelists.pyx @@ -0,0 +1,273 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + + +from pylibcugraph._cugraph_c.resource_handle cimport ( + cugraph_resource_handle_t, + bool_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph._cugraph_c.graph_generators cimport ( + cugraph_generate_rmat_edgelists, + cugraph_generate_edge_weights, + cugraph_generate_edge_ids, + cugraph_generate_edge_types, + cugraph_coo_t, + cugraph_coo_list_t, + cugraph_generator_distribution_t, + cugraph_coo_get_sources, + cugraph_coo_get_destinations, + cugraph_coo_get_edge_weights, + cugraph_coo_get_edge_id, + cugraph_coo_get_edge_type, + cugraph_coo_list_size, + cugraph_coo_list_element, + cugraph_coo_free, + cugraph_coo_list_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.utils cimport ( + assert_success, + copy_to_cupy_array, + get_c_type_from_numpy_type, +) +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) + + +def generate_rmat_edgelists(ResourceHandle resource_handle, + random_state, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + size_distribution, + edge_distribution, + bool_t clip_and_flip, + bool_t scramble_vertex_ids, + bool_t include_edge_weights, + minimum_weight, + maximum_weight, + dtype, + bool_t include_edge_ids, + bool_t include_edge_types, + min_edge_type_value, + max_edge_type_value, + bool_t multi_gpu, + ): + """ + Generate multiple RMAT edge list + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + random_state : int , optional + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + + n_edgelists : size_t + Number of edge lists (graphs) to generate + + min_scale : size_t + Scale factor to set the minimum number of vertices in the graph + + max_scale : size_t + Scale factor to set the maximum number of vertices in the graph + + edge_factor : size_t + Average number of edges per vertex to generate + + size_distribution : int + Distribution of the graph sizes, impacts the scale parameter of the + R-MAT generator. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + edge_distribution : int + Edges distribution for each graph, impacts how R-MAT parameters + a,b,c,d, are set. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false') + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to `true`) + or not (if set to `false`); scrambling vertex ID bits breaks + correlation between vertex ID values and vertex degrees. + + include_edge_weights : bool + Flag controlling whether to generate edges with weights + (if set to 'true') or not (if set to 'false'). + + minimum_weight : double + Minimum weight value to generate (if 'include_edge_weights' is 'true') + + maximum_weight : double + Maximum weight value to generate (if 'include_edge_weights' is 'true') + + dtype : string + The type of weight to generate ("FLOAT32" or "FLOAT64"), ignored unless + include_weights is true + + include_edge_ids : bool + Flag controlling whether to generate edges with ids + (if set to 'true') or not (if set to 'false'). + + include_edge_types : bool + Flag controlling whether to generate edges with types + (if set to 'true') or not (if set to 'false'). + + min_edge_type_value : int + Minimum edge type to generate if 'include_edge_types' is 'true' + otherwise, this parameter is ignored. + + max_edge_type_value : int + Maximum edge type to generate if 'include_edge_types' is 'true' + otherwise, this paramter is ignored. + + + Returns + ------- + return a list of tuple containing the sources and destinations with their + corresponding weights, ids and types if the flags 'include_edge_weights', + 'include_edge_ids' and 'include_edge_types' are respectively set to 'true' + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + + cdef cugraph_coo_list_t* result_coo_list_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = \ + cg_rng_state.rng_state_ptr + + cdef cugraph_generator_distribution_t size_distribution_ + cdef cugraph_generator_distribution_t edge_distribution_ + + if size_distribution == 0: + size_distribution_ = cugraph_generator_distribution_t.POWER_LAW + else: + size_distribution_ = cugraph_generator_distribution_t.UNIFORM + + if edge_distribution == 0: + edge_distribution_ = cugraph_generator_distribution_t.POWER_LAW + else: + edge_distribution_ = cugraph_generator_distribution_t.UNIFORM + + error_code = cugraph_generate_rmat_edgelists(c_resource_handle_ptr, + rng_state_ptr, + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution_, + edge_distribution_, + clip_and_flip, + scramble_vertex_ids, + &result_coo_list_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "generate_rmat_edgelists") + + cdef size_t size = cugraph_coo_list_size(result_coo_list_ptr) + + cdef cugraph_coo_t* result_coo_ptr + + cdef cugraph_type_erased_device_array_view_t* sources_view_ptr + cdef cugraph_type_erased_device_array_view_t* destinations_view_ptr + + cupy_edge_weights = None + cupy_edge_ids = None + cupy_edge_types = None + + edgelists = [] + + for index in range(size): + result_coo_ptr = cugraph_coo_list_element(result_coo_list_ptr, index) + sources_view_ptr = cugraph_coo_get_sources(result_coo_ptr) + + destinations_view_ptr = cugraph_coo_get_destinations(result_coo_ptr) + cupy_sources = copy_to_cupy_array(c_resource_handle_ptr, sources_view_ptr) + cupy_destinations = copy_to_cupy_array(c_resource_handle_ptr, destinations_view_ptr) + + if include_edge_weights: + dtype = get_c_type_from_numpy_type(dtype) + error_code = cugraph_generate_edge_weights(c_resource_handle_ptr, + rng_state_ptr, + result_coo_ptr, + dtype, + minimum_weight, + maximum_weight, + &error_ptr) + assert_success(error_code, error_ptr, "generate_edge_weights") + + edge_weights_view_ptr = cugraph_coo_get_edge_weights(result_coo_ptr) + cupy_edge_weights = copy_to_cupy_array(c_resource_handle_ptr, edge_weights_view_ptr) + + + + if include_edge_ids: + error_code = cugraph_generate_edge_ids(c_resource_handle_ptr, + result_coo_ptr, + multi_gpu, + &error_ptr) + + assert_success(error_code, error_ptr, "generate_edge_ids") + + edge_ids_view_ptr = cugraph_coo_get_edge_id(result_coo_ptr) + cupy_edge_ids = copy_to_cupy_array(c_resource_handle_ptr, edge_ids_view_ptr) + + if include_edge_types: + error_code = cugraph_generate_edge_types(c_resource_handle_ptr, + rng_state_ptr, + result_coo_ptr, + min_edge_type_value, + max_edge_type_value, + &error_ptr) + + assert_success(error_code, error_ptr, "generate_edge_types") + + edge_type_view_ptr = cugraph_coo_get_edge_type(result_coo_ptr) + cupy_edge_types = copy_to_cupy_array(c_resource_handle_ptr, edge_type_view_ptr) + + + edgelists.append((cupy_sources, cupy_destinations, cupy_edge_weights, cupy_edge_ids, cupy_edge_types)) + + # FIXME: Does freeing 'result_coo_ptr' automatically free 'result_coo_list_ptr'? + cugraph_coo_free(result_coo_ptr) + + return edgelists diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index dfbbf09129b..49b9747f0b3 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -14,37 +14,21 @@ # Have cython use python 3 syntax # cython: language_level = 3 -from libc.stdint cimport uintptr_t - -from pylibcugraph._cugraph_c.resource_handle cimport ( - bool_t, - cugraph_resource_handle_t, - data_type_id_t, -) from pylibcugraph._cugraph_c.error cimport ( cugraph_error_code_t, cugraph_error_t, ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.graph cimport ( - cugraph_graph_t, cugraph_sg_graph_create, cugraph_mg_graph_create, cugraph_sg_graph_create_from_csr, - cugraph_graph_properties_t, cugraph_sg_graph_free, cugraph_mg_graph_free, ) -from pylibcugraph._cugraph_c.graph cimport ( - cugraph_graph_t, - cugraph_mg_graph_create, - cugraph_graph_properties_t, - cugraph_mg_graph_free, -) from pylibcugraph.resource_handle cimport ( ResourceHandle, ) @@ -54,7 +38,6 @@ from pylibcugraph.graph_properties cimport ( from pylibcugraph.utils cimport ( assert_success, assert_CAI_type, - get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) diff --git a/python/pylibcugraph/pylibcugraph/hits.pyx b/python/pylibcugraph/pylibcugraph/hits.pyx index 7c472f54866..4524a4f70df 100644 --- a/python/pylibcugraph/pylibcugraph/hits.pyx +++ b/python/pylibcugraph/pylibcugraph/hits.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx index 0f08e690f92..fc78ca89e87 100644 --- a/python/pylibcugraph/pylibcugraph/katz_centrality.pyx +++ b/python/pylibcugraph/pylibcugraph/katz_centrality.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,6 @@ from libc.stdint cimport uintptr_t from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -49,7 +48,6 @@ from pylibcugraph.graphs cimport ( from pylibcugraph.utils cimport ( assert_success, copy_to_cupy_array, - assert_CAI_type, get_c_type_from_numpy_type, ) diff --git a/python/pylibcugraph/pylibcugraph/node2vec.pyx b/python/pylibcugraph/pylibcugraph/node2vec.pyx index a550070e7a7..d0ab3f22b00 100644 --- a/python/pylibcugraph/pylibcugraph/node2vec.pyx +++ b/python/pylibcugraph/pylibcugraph/node2vec.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,6 @@ from libc.stdint cimport uintptr_t from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( diff --git a/python/pylibcugraph/pylibcugraph/pagerank.pyx b/python/pylibcugraph/pylibcugraph/pagerank.pyx index a5022072b4c..f831d844338 100644 --- a/python/pylibcugraph/pylibcugraph/pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/pagerank.pyx @@ -14,11 +14,8 @@ # Have cython use python 3 syntax # cython: language_level = 3 -from libc.stdint cimport uintptr_t - from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -27,7 +24,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_free ) from pylibcugraph._cugraph_c.graph cimport ( @@ -49,9 +45,7 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, - get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) from pylibcugraph.exceptions import FailedToConvergeError diff --git a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx index e60e7fa2c3e..79ef80be549 100644 --- a/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx +++ b/python/pylibcugraph/pylibcugraph/personalized_pagerank.pyx @@ -14,11 +14,8 @@ # Have cython use python 3 syntax # cython: language_level = 3 -from libc.stdint cimport uintptr_t - from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -27,7 +24,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_free ) from pylibcugraph._cugraph_c.graph cimport ( @@ -49,9 +45,7 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, - get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, ) from pylibcugraph.exceptions import FailedToConvergeError diff --git a/python/pylibcugraph/pylibcugraph/structure/graph_utilities.pxd b/python/pylibcugraph/pylibcugraph/structure/graph_utilities.pxd deleted file mode 100644 index d9532cd4190..00000000000 --- a/python/pylibcugraph/pylibcugraph/structure/graph_utilities.pxd +++ /dev/null @@ -1,53 +0,0 @@ -# Copyright (c) 2021-2022, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - - -from pylibraft.common.handle cimport * -from libcpp cimport bool - - -cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": - - ctypedef enum numberTypeEnum: - int32Type "cugraph::cython::numberTypeEnum::int32Type" - int64Type "cugraph::cython::numberTypeEnum::int64Type" - floatType "cugraph::cython::numberTypeEnum::floatType" - doubleType "cugraph::cython::numberTypeEnum::doubleType" - - cdef cppclass graph_container_t: - pass - - cdef void populate_graph_container( - graph_container_t &graph_container, - handle_t &handle, - void *src_vertices, - void *dst_vertices, - void *weights, - void *vertex_partition_offsets, - void *segment_offsets, - size_t num_segments, - numberTypeEnum vertexType, - numberTypeEnum edgeType, - numberTypeEnum weightType, - size_t num_local_edges, - size_t num_global_vertices, - size_t num_global_edges, - bool is_weighted, - bool is_symmetric, - bool transposed, - bool multi_gpu) except + diff --git a/python/pylibcugraph/pylibcugraph/tests/test_rmat.py b/python/pylibcugraph/pylibcugraph/tests/test_rmat.py new file mode 100644 index 00000000000..b57cb12aa28 --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/tests/test_rmat.py @@ -0,0 +1,99 @@ +# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest +import cupy as cp + +from pylibcugraph import ( + ResourceHandle, +) +from pylibcugraph import generate_rmat_edgelist + +# ============================================================================= +# Pytest fixtures +# ============================================================================= +# fixtures used in this test module are defined in conftest.py + + +# ============================================================================= +# Tests +# ============================================================================= + + +def check_results( + result, scale, num_edges, include_edge_ids, include_edge_weights, include_edge_types +): + + h_src_arr, h_dst_arr, h_wgt_arr, h_ids_arr, h_types_arr = result + + if include_edge_weights: + assert h_wgt_arr is not None + if include_edge_ids: + assert h_ids_arr is not None + if include_edge_types: + assert h_types_arr is not None + + vertices = cp.union1d(h_src_arr, h_dst_arr) + assert len(h_src_arr) == len(h_dst_arr) == num_edges + assert len(vertices) <= 2**scale + + +# TODO: Coverage for the MG implementation +@pytest.mark.parametrize("scale", [2, 4, 8]) +@pytest.mark.parametrize("num_edges", [4, 16, 32]) +@pytest.mark.parametrize("clip_and_flip", [False, True]) +@pytest.mark.parametrize("scramble_vertex_ids", [False, True]) +@pytest.mark.parametrize("include_edge_weights", [False, True]) +@pytest.mark.parametrize("include_edge_types", [False, True]) +@pytest.mark.parametrize("include_edge_ids", [False, True]) +def test_rmat( + scale, + num_edges, + clip_and_flip, + scramble_vertex_ids, + include_edge_weights, + include_edge_types, + include_edge_ids, +): + + resource_handle = ResourceHandle() + + result = generate_rmat_edgelist( + resource_handle=resource_handle, + random_state=42, + scale=scale, + num_edges=num_edges, + a=0.57, + b=0.19, + c=0.19, + clip_and_flip=clip_and_flip, + scramble_vertex_ids=scramble_vertex_ids, + include_edge_weights=include_edge_weights, + minimum_weight=0, + maximum_weight=1, + dtype=cp.float32, + include_edge_ids=include_edge_ids, + include_edge_types=include_edge_types, + min_edge_type_value=2, + max_edge_type_value=5, + multi_gpu=False, + ) + check_results( + result, + scale, + num_edges, + include_edge_ids, + include_edge_weights, + include_edge_types, + ) diff --git a/python/pylibcugraph/pylibcugraph/triangle_count.pyx b/python/pylibcugraph/pylibcugraph/triangle_count.pyx index e26b2a291cf..fd86181b581 100644 --- a/python/pylibcugraph/pylibcugraph/triangle_count.pyx +++ b/python/pylibcugraph/pylibcugraph/triangle_count.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,6 @@ from libc.stdint cimport uintptr_t from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( diff --git a/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx b/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx index 649f7980747..3989e45d48f 100644 --- a/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx +++ b/python/pylibcugraph/pylibcugraph/two_hop_neighbors.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,7 +25,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_free, ) from pylibcugraph._cugraph_c.graph_functions cimport ( @@ -46,9 +45,7 @@ from pylibcugraph.graphs cimport ( ) from pylibcugraph.utils cimport ( assert_success, - assert_CAI_type, copy_to_cupy_array, - get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj ) diff --git a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx index a1832948f28..99519ab04f7 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_neighbor_sample.pyx @@ -18,7 +18,6 @@ from libc.stdint cimport uintptr_t from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -38,11 +37,6 @@ from pylibcugraph._cugraph_c.graph cimport ( ) from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sample_result_t, - cugraph_sample_result_get_sources, - cugraph_sample_result_get_destinations, - cugraph_sample_result_get_index, - cugraph_sample_result_free, - cugraph_prior_sources_behavior_t, cugraph_sampling_options_t, cugraph_sampling_options_create, @@ -62,11 +56,9 @@ from pylibcugraph.resource_handle cimport ( ) from pylibcugraph.graphs cimport ( _GPUGraph, - MGGraph, ) from pylibcugraph.utils cimport ( assert_success, - copy_to_cupy_array, assert_CAI_type, assert_AI_type, get_c_type_from_numpy_type, diff --git a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx index 4883fc9e6b1..4a2b8a70189 100644 --- a/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx +++ b/python/pylibcugraph/pylibcugraph/uniform_random_walks.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,8 +17,6 @@ from libc.stdint cimport uintptr_t from pylibcugraph._cugraph_c.resource_handle cimport ( - bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -29,9 +27,6 @@ from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_free, - cugraph_type_erased_host_array_view_t, - cugraph_type_erased_host_array_view_create, - cugraph_type_erased_host_array_view_free, ) from pylibcugraph._cugraph_c.graph cimport ( cugraph_graph_t, @@ -41,7 +36,6 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_random_walk_result_t, cugraph_random_walk_result_get_paths, cugraph_random_walk_result_get_weights, - cugraph_random_walk_result_get_path_sizes, cugraph_random_walk_result_get_max_path_length, cugraph_random_walk_result_free, ) @@ -50,13 +44,11 @@ from pylibcugraph.resource_handle cimport ( ) from pylibcugraph.graphs cimport ( _GPUGraph, - MGGraph, ) from pylibcugraph.utils cimport ( assert_success, copy_to_cupy_array, assert_CAI_type, - assert_AI_type, get_c_type_from_numpy_type, ) diff --git a/python/pylibcugraph/pylibcugraph/utils.pyx b/python/pylibcugraph/pylibcugraph/utils.pyx index a9fc8fce711..70bef89f4cf 100644 --- a/python/pylibcugraph/pylibcugraph/utils.pyx +++ b/python/pylibcugraph/pylibcugraph/utils.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -124,13 +124,14 @@ cdef get_numpy_type_from_c_type(data_type_id_t c_type): cdef get_c_type_from_numpy_type(numpy_type): - if numpy_type == numpy.int32: + dt = numpy.dtype(numpy_type) + if dt == numpy.int32: return data_type_id_t.INT32 - elif numpy_type == numpy.int64: + elif dt == numpy.int64: return data_type_id_t.INT64 - elif numpy_type == numpy.float32: + elif dt == numpy.float32: return data_type_id_t.FLOAT32 - elif numpy_type == numpy.float64: + elif dt == numpy.float64: return data_type_id_t.FLOAT64 else: raise RuntimeError("Internal error: got invalid data type enum value " diff --git a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx index abd78aa8c10..7cc0d8ab4c1 100644 --- a/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx +++ b/python/pylibcugraph/pylibcugraph/weakly_connected_components.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,12 +14,10 @@ # Have cython use python 3 syntax # cython: language_level = 3 -from libc.stdint cimport uintptr_t from pylibcugraph import GraphProperties, SGGraph from pylibcugraph._cugraph_c.resource_handle cimport ( bool_t, - data_type_id_t, cugraph_resource_handle_t, ) from pylibcugraph._cugraph_c.error cimport ( @@ -28,7 +26,6 @@ from pylibcugraph._cugraph_c.error cimport ( ) from pylibcugraph._cugraph_c.array cimport ( cugraph_type_erased_device_array_view_t, - cugraph_type_erased_device_array_view_create, cugraph_type_erased_device_array_view_copy, ) from pylibcugraph._cugraph_c.graph cimport ( @@ -51,7 +48,6 @@ from pylibcugraph.utils cimport ( assert_success, assert_CAI_type, copy_to_cupy_array, - get_c_type_from_numpy_type, create_cugraph_type_erased_device_array_view_from_py_obj, )