From cafda937b683466484d0a6f0c3b721321cc98f59 Mon Sep 17 00:00:00 2001 From: bkj Date: Fri, 20 Nov 2020 17:08:50 +0000 Subject: [PATCH] Squashed commit of the following: commit 8db053353adb6ddde7eeec0e77b03cf41600da83 Author: bkj Date: Fri Nov 20 17:01:36 2020 +0000 fixing param names; removing generic; renaming apps commit 9533cdc9e1ae619a878f7952701bfa67a9015524 Merge: 5dc2019 de2606f Author: bkj Date: Wed Nov 11 20:10:21 2020 +0000 merging shared pointer commit 5dc2019cbe15cd531b254d647a9729467a462431 Merge: 039c8d3 f37ad07 Author: bkj Date: Mon Nov 9 18:57:22 2020 +0000 Merge branch 'bkj/api4' into bkj/api_1027 commit 039c8d3c43da1555d6ed3f6e46457805717d24b0 Merge: d439e90 4d32db8 Author: bkj Date: Mon Nov 9 18:54:30 2020 +0000 Merge branch 'master' into bkj/api_1027 commit d439e90c580c17a5158140bfad257780dc69b442 Merge: c07a033 8d4ca0e Author: bkj Date: Mon Nov 9 18:46:39 2020 +0000 Merge branch 'master' into bkj/api_1027 commit f37ad07ebf12b32508bab6cd082f16ad23d4bd36 Author: bkj Date: Wed Nov 4 20:56:55 2020 +0000 allow graph constructors to be called w/ raw pointers (as in eg cugraph) commit c07a033a963630909bb134ad84e86dc388011b2f Author: bkj Date: Mon Nov 2 17:59:23 2020 +0000 initialization commit 63899956f5c4e0608876ff55ed3f5756c7930fe0 Author: bkj Date: Mon Nov 2 17:41:10 2020 +0000 initializers commit 9e86f5d275a826e651c56c4290f6c500318e0477 Merge: 20dd0fd 771eb98 Author: bkj Date: Mon Nov 2 17:35:14 2020 +0000 Merge branch 'bkj/api4' into bkj/api_1027 commit 771eb983b945652448889f5f7d7ed380f5c3393a Merge: 5e39791 2aa9447 Author: bkj Date: Mon Nov 2 17:33:58 2020 +0000 Merge branch 'master' into bkj/api4 commit 20dd0fd0b27ed093801ffb33fa9712364af58465 Author: bkj Date: Wed Oct 28 19:45:43 2020 +0000 typo commit 90bc820656ac6378dd204bec5c69002aa8a898b8 Author: bkj Date: Wed Oct 28 19:38:21 2020 +0000 adding generic runner commit 337d11ea2dbf6e62d0f3fb953051c73b3a0cb02b Author: bkj Date: Wed Oct 28 19:23:19 2020 +0000 further API cleanup commit e8c867dead937cf50aa48f8cc223479e5d819718 Author: bkj Date: Wed Oct 28 18:43:03 2020 +0000 updating color implementation commit 5eb1091d431ea9cdaae8f436f4edba945fd5b004 Merge: a39455e 08918f2 Author: bkj Date: Wed Oct 28 18:34:50 2020 +0000 Merge branch 'master' into bkj/api_1027 commit a39455eec7333004b53f7710087709e368586d78 Author: bkj Date: Wed Oct 28 18:33:23 2020 +0000 color in new format commit 839baae0ae1c5a131d3051df9675ca023d9a4c32 Author: bkj Date: Wed Oct 28 18:02:51 2020 +0000 ... commit 746b115fe5990ce93fc48307e8d75404760081ba Author: bkj Date: Tue Oct 27 23:56:00 2020 +0000 refactoring for external calls commit 5e397910963121918ae34cdccf14841a471aa19c Author: bkj Date: Tue Oct 27 23:11:45 2020 +0000 make meta and graph simultaneously commit e706ee0f1d4f900e699496b5bbc9a7ed1a6d604b Author: bkj Date: Tue Oct 27 22:14:45 2020 +0000 user allocate results; no access to visited commit be96e18687a8dba4b12acb661cba699c0aa68b41 Merge: 7fbbaf0 0bb12c2 Author: bkj Date: Mon Oct 26 15:59:00 2020 +0000 Merge branch 'master' into bkj/api4 commit 7fbbaf0cf89c4c3314e1199d3e4803e6ea3f1364 Author: bkj Date: Thu Oct 22 04:11:04 2020 +0000 ... commit a11dbbaa9661c6f75e1738331d719fa5a816b89c Author: bkj Date: Tue Oct 20 22:28:53 2020 +0000 move visited initialization from result to problem commit 9cf865bd556a67ff924315908c98017b2b7a3814 Author: bkj Date: Tue Oct 20 22:14:49 2020 +0000 testing commit c15ca21729f25f33ae4e04c97109818f65b6307e Author: bkj Date: Tue Oct 20 22:07:48 2020 +0000 merge master commit 6720071cdc6150bfa28c0925a86a8b7e3ec18f04 Merge: 40d8dd6 1493056 Author: bkj Date: Tue Oct 20 22:06:36 2020 +0000 Merge branch 'master' into bkj/api5 commit 40d8dd6a28033abbcde9622a69e6324e56623e5a Author: bkj Date: Tue Oct 20 22:04:06 2020 +0000 api commit e42aef14408596413fc8b122e7e32e06d4d26a15 Author: bkj Date: Tue Oct 20 22:02:32 2020 +0000 cleaning up API commit a27067d523d0cbb0882daad7c9ee6be599a52a92 Author: bkj Date: Tue Oct 20 19:47:12 2020 +0000 testing commit 86f66ccb5deac08479c387c49cc64aeac7c19f14 Merge: 967fb35 d319696 Author: bkj Date: Tue Oct 20 19:24:33 2020 +0000 merging changed commit d319696ce593a8767c22893a42b130146ebd163c Author: bkj Date: Fri Oct 16 17:00:14 2020 +0000 docs commit 6abc90141a972d7b2a3ab2b620fb7ef97544c332 Author: bkj Date: Fri Oct 16 16:57:41 2020 +0000 refactoring runner to be application agnostic --- examples/Makefile.inc | 28 +-- examples/color/color.cu | 104 ++++----- examples/sssp/sssp.cu | 70 +++--- gunrock/applications/color.hxx | 182 ++++++++++++++++ gunrock/applications/color/color.hxx | 86 -------- .../color/color_implementation.hxx | 169 --------------- gunrock/applications/sssp.hxx | 200 ++++++++++++++++++ gunrock/applications/sssp/sssp.hxx | 78 ------- .../applications/sssp/sssp_implementation.hxx | 172 --------------- gunrock/formats/csr.hxx | 2 +- gunrock/framework/enactor.hxx | 4 +- gunrock/framework/problem.hxx | 45 ++-- gunrock/graph/build.hxx | 115 +++++----- 13 files changed, 580 insertions(+), 675 deletions(-) create mode 100644 gunrock/applications/color.hxx delete mode 100644 gunrock/applications/color/color.hxx delete mode 100644 gunrock/applications/color/color_implementation.hxx create mode 100644 gunrock/applications/sssp.hxx delete mode 100644 gunrock/applications/sssp/sssp.hxx delete mode 100644 gunrock/applications/sssp/sssp_implementation.hxx diff --git a/examples/Makefile.inc b/examples/Makefile.inc index 979192cc..89c62cb5 100644 --- a/examples/Makefile.inc +++ b/examples/Makefile.inc @@ -28,25 +28,25 @@ SM_TARGETS = $(GEN_SM61) #------------------------------------------------------------------------------- ifeq (DARWIN, $(findstring DARWIN, $(OSUPPER))) - OMP_INC = -I"/usr/local/include/libiomp" - OMP_LINK = -Xlinker /usr/local/lib/libiomp5.dylib + OMP_INC = -I"/usr/local/include/libiomp" + OMP_LINK = -Xlinker /usr/local/lib/libiomp5.dylib else - OMP_INC = - OMP_LINK = -Xcompiler -fopenmp -Xlinker -lgomp + OMP_INC = + OMP_LINK = -Xcompiler -fopenmp -Xlinker -lgomp endif -EXT_INC = ../../externals -CUDA_INC = -I"$(shell dirname $(NVCC))/../include" -MGPU_INC = -I"$(EXT_INC)/moderngpu/src" -JSON_INC = -I"$(EXT_INC)/rapidjson/include" -MTX_INC = -I"$(EXT_INC)/mtx" +EXT_INC = ../../externals +CUDA_INC = -I"$(shell dirname $(NVCC))/../include" +MGPU_INC = -I"$(EXT_INC)/moderngpu/src" +JSON_INC = -I"$(EXT_INC)/rapidjson/include" +MTX_INC = -I"$(EXT_INC)/mtx" -CUB = -lcub -CUSPARSE = -lcusparse +CUB = -lcub +CUSPARSE = -lcusparse -GUNROCK_DEF = -Xcompiler -DGUNROCKVERSION=2.0.0 -LINK = $(BOOST_LINK) $(OMP_LINK) $(METIS_LINK) $(GUNROCK_DEF) -INC = -I.. -I../.. $(CUDA_CPP) $(OMP_INC) $(MGPU_INC) $(CUB_INC) $(JSON_INC) $(CUDA_INC) $(MTX_INC) $(LINK) +GUNROCK_DEF = -Xcompiler -DGUNROCKVERSION=2.0.0 +LINK = $(BOOST_LINK) $(OMP_LINK) $(METIS_LINK) $(GUNROCK_DEF) +INC = -I.. -I../.. $(CUDA_CPP) $(OMP_INC) $(MGPU_INC) $(CUB_INC) $(JSON_INC) $(CUDA_INC) $(MTX_INC) $(LINK) #------------------------------------------------------------------------------- # Defines diff --git a/examples/color/color.cu b/examples/color/color.cu index d4b7351f..9d276724 100644 --- a/examples/color/color.cu +++ b/examples/color/color.cu @@ -1,80 +1,64 @@ #include // EXIT_SUCCESS #include -#include +#include using namespace gunrock; - -/** - * @brief Count unique number of colors used to color the graph. - * - * @tparam T - * @param v - * @return std::size_t - */ -template -std::size_t unique_colors(const thrust::host_vector& v) { - std::size_t num_unique_elements = 0; - std::unordered_set set; - - for (const auto& elem : v) { - if (set.find(elem) == set.end()) { - set.insert(elem); - ++num_unique_elements; - } - } - - return num_unique_elements; -} +using namespace memory; void test_color(int num_arguments, char** argument_array) { - using vertex_t = int; - using edge_t = int; - using weight_t = float; - - constexpr memory::memory_space_t space = memory::memory_space_t::device; - + if (num_arguments != 2) { - std::cerr << "usage: ./bin/color filename.mtx" << std::endl; + std::cerr << "usage: ./bin/ filename.mtx" << std::endl; exit(1); } - - // Load Matrix-Market file & convert the resultant COO into CSR format. + + // -- + // Define types + + using vertex_t = int; + using edge_t = int; + using weight_t = float; + + // -- + // IO + std::string filename = argument_array[1]; + io::matrix_market_t mm; - auto coo = mm.load(filename); - format::csr_t csr; - csr = coo; - - // Move data to device. - thrust::device_vector d_Ap = csr.row_offsets; - thrust::device_vector d_Aj = csr.column_indices; - thrust::device_vector d_Ax = csr.nonzero_values; + format::csr_t csr; + csr.from_coo(mm.load(filename)); - thrust::device_vector d_colors(csr.number_of_rows); - - // calling color - float elapsed = - color::execute(csr.number_of_rows, // number of vertices - csr.number_of_columns, // number of columns - csr.number_of_nonzeros, // number of edges - d_Ap, // row_offsets - d_Aj, // column_indices - d_Ax, // nonzero values - d_colors // output colors - ); - - thrust::host_vector colors = d_colors; - std::cout << "Number of Colors: " << unique_colors(colors) << std::endl; - std::cout << "Colors (output) = "; - thrust::copy(d_colors.begin(), d_colors.end(), - std::ostream_iterator(std::cout, " ")); + // -- + // Build graph + metadata + + auto [G, meta] = graph::build::from_csr_t(&csr); + + // -- + // Params and memory allocation + + vertex_t n_vertices = meta[0].get_number_of_vertices(); + thrust::device_vector colors(n_vertices); + + // -- + // Run problem + + float elapsed = gunrock::color::run( + G, + meta, + colors.data().get() + ); + + // -- + // Log + + std::cout << "Distances (output) = "; + thrust::copy(colors.begin(), colors.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; - std::cout << "color Elapsed Time: " << elapsed << " (ms)" << std::endl; } int main(int argc, char** argv) { test_color(argc, argv); return EXIT_SUCCESS; -} \ No newline at end of file +} diff --git a/examples/sssp/sssp.cu b/examples/sssp/sssp.cu index 064560e9..27d5ae63 100644 --- a/examples/sssp/sssp.cu +++ b/examples/sssp/sssp.cu @@ -1,44 +1,64 @@ #include // EXIT_SUCCESS -#include +#include using namespace gunrock; using namespace memory; void test_sssp(int num_arguments, char** argument_array) { - using vertex_t = int; - using edge_t = int; - using weight_t = float; - + if (num_arguments != 2) { - std::cerr << "usage: ./bin/color filename.mtx" << std::endl; + std::cerr << "usage: ./bin/ filename.mtx" << std::endl; exit(1); } - - // Load Matrix-Market file & convert the resultant COO into CSR format. + + // -- + // Define types + + using vertex_t = int; + using edge_t = int; + using weight_t = float; + + // -- + // IO + std::string filename = argument_array[1]; + io::matrix_market_t mm; - auto coo = mm.load(filename); - - // convert coo to csr - format::csr_t csr; - csr = coo; // Able to convert host-based coo_t to device-based csr (or host - // to host). As of right now, it requires coo to be host side. - - vertex_t source = 0; - thrust::device_vector d_distances(csr.number_of_rows); + format::csr_t csr; + csr.from_coo(mm.load(filename)); - // calling sssp - float elapsed = sssp::execute(csr, // device csr_t sparse data - source, // single source - d_distances // output distances + // -- + // Build graph + metadata + + auto [G, meta] = graph::build::from_csr_t(&csr); + + // -- + // Params and memory allocation + + vertex_t single_source = 0; + + vertex_t n_vertices = meta->get_number_of_vertices(); + thrust::device_vector distances(n_vertices); + thrust::device_vector predecessors(n_vertices); + + // -- + // Run problem + + float elapsed = gunrock::sssp::run( + G, + meta, + single_source, + distances.data().get(), + predecessors.data().get() ); - + + // -- + // Log + std::cout << "Distances (output) = "; - thrust::copy(d_distances.begin(), d_distances.end(), - std::ostream_iterator(std::cout, " ")); + thrust::copy(distances.begin(), distances.end(), std::ostream_iterator(std::cout, " ")); std::cout << std::endl; - std::cout << "SSSP Elapsed Time: " << elapsed << " (ms)" << std::endl; } diff --git a/gunrock/applications/color.hxx b/gunrock/applications/color.hxx new file mode 100644 index 00000000..0ee1f81b --- /dev/null +++ b/gunrock/applications/color.hxx @@ -0,0 +1,182 @@ +#pragma once + +#include +#include + +#include +#include + +namespace gunrock { +namespace color { + +template +struct param_t { + // No parameters for this algorithm +}; + +template +struct result_t { + using vertex_t = typename meta_t::vertex_type; + + vertex_t* colors; + result_t(vertex_t* colors_) { + colors = colors_; + } +}; + +template +struct problem_t : gunrock::problem_t { + // Use Base class constructor -- does this work? does it handle copy constructor? + using gunrock::problem_t::problem_t; + + using vertex_t = typename meta_t::vertex_type; + using edge_t = typename meta_t::edge_type; + using weight_t = typename meta_t::weight_type; + + thrust::device_vector randoms; + + void reset() { + + // XXX: Ugly. Initialize d_colors to be all INVALIDs. + auto n_vertices = this->get_meta_pointer()->get_number_of_vertices(); + auto d_colors = thrust::device_pointer_cast(this->result->colors); + thrust::fill( + thrust::device, + d_colors + 0, + d_colors + n_vertices, + std::numeric_limits::max() + ); + + // Generate random numbers. + randoms.resize(n_vertices); + algo::generate::random::uniform_distribution(0, n_vertices, randoms.begin()); + + } +}; + +template +struct enactor_t : gunrock::enactor_t { + using gunrock::enactor_t::enactor_t; + + using vertex_t = typename problem_t::vertex_t; + using edge_t = typename problem_t::edge_t; + using weight_t = typename problem_t::weight_t; + + // + void prepare_frontier(cuda::standard_context_t* context) override { + auto E = this->get_enactor(); // Enactor pointer + auto P = E->get_problem_pointer(); // Problem pointer + auto meta = P->get_meta_pointer(); // metadata pointer + auto f = E->get_active_frontier_buffer(); // active frontier + + // XXX: Find a better way to initialize the frontier to all nodes + for (vertex_t v = 0; v < meta->get_number_of_vertices(); ++v) + f->push_back(v); + } + + void loop(cuda::standard_context_t* context) override { + // Data slice + auto E = this->get_enactor(); + auto P = E->get_problem_pointer(); + auto G = P->get_graph_pointer(); + + auto colors = P->result->colors; + auto randoms = P->randoms.data().get(); + auto iteration = E->iteration; + + auto color_me_in = [G, colors, randoms, iteration] __host__ __device__( + vertex_t const& vertex) -> bool { + // If invalid vertex, exit early. + if (vertex == std::numeric_limits::max()) + return false; + + edge_t start_edge = G->get_starting_edge(vertex); + edge_t num_neighbors = G->get_number_of_neighbors(vertex); + + bool colormax = true; + bool colormin = true; + + // Color two nodes at the same time. + int color = iteration * 2; + + // Main loop that goes over all the neighbors and finds the maximum or + // minimum random number vertex. + for (edge_t e = start_edge; e < start_edge + num_neighbors; ++e) { + vertex_t u = G->get_destination_vertex(e); + + if ((colors[u] != std::numeric_limits::max()) && + (colors[u] != color + 1) && (colors[u] != color + 2) || + (vertex == u)) + continue; + if (randoms[vertex] <= randoms[u]) + colormax = false; + if (randoms[vertex] >= randoms[u]) + colormin = false; + } + + // Color if the node has the maximum OR minimum random number, this way, + // per iteration we can possibly fill 2 colors at the same time. + if (colormax) { + colors[vertex] = color + 1; + return false; // remove (colored). + } else if (colormin) { + colors[vertex] = color + 2; + return false; // remove (colored). + } else { + return true; // keep (not colored). + } + }; + + // Execute filter operator on the provided lambda. + operators::filter::execute( + G, E, color_me_in, context); + } + // +}; // struct enactor_t + +// !! This should go somewhere else -- @neoblizz, where? +auto get_default_context() { + std::vector devices; + devices.push_back(0); + + return std::shared_ptr( + new cuda::multi_context_t(devices)); +} + +template < + typename graph_vector_t, + typename meta_vector_t, + typename graph_t = typename graph_vector_t::value_type, + typename meta_t = typename meta_vector_t::value_type> +float run( + graph_vector_t& G, + meta_vector_t& meta, + typename meta_t::vertex_type* colors // Output +) { + + // + param_t param; + result_t result(colors); + // + // + auto multi_context = get_default_context(); + + using problem_type = problem_t, result_t>; + using enactor_type = enactor_t; + + problem_type problem( + G.data().get(), // input graph (GPU) + meta.data(), // metadata (CPU) + ¶m, // input parameters + &result, // output results + multi_context // input context + ); + problem.reset(); + + enactor_type enactor(&problem, multi_context); + return enactor.enact(); + // +} + +} // namespace color +} // namespace gunrock \ No newline at end of file diff --git a/gunrock/applications/color/color.hxx b/gunrock/applications/color/color.hxx deleted file mode 100644 index cc6dbf44..00000000 --- a/gunrock/applications/color/color.hxx +++ /dev/null @@ -1,86 +0,0 @@ -/** - * @file color.hxx - * @author Muhammad Osama (mosama@ucdavis.edu) - * @brief Single-Source Shortest Path graph algorithm. - * @version 0.1 - * @date 2020-10-05 - * - * @copyright Copyright (c) 2020 - * - */ - -#include - -#pragma once - -namespace gunrock { -namespace color { - -using namespace memory; - -template -float color(graph_type* G, - host_graph_type* g, - typename graph_type::vertex_pointer_t colors) { - using color_problem_type = color_problem_t; - using color_enactor_type = color_enactor_t; - - // Create contexts for all the devices - std::vector devices; - devices.push_back(0); - - auto multi_context = std::shared_ptr( - new cuda::multi_context_t(devices)); - - color_problem_type color_problem(G, // input graph (GPU) - g, // input graph (CPU) - multi_context, // input context - colors // output color/vertex - ); - - cudaDeviceSynchronize(); - error::throw_if_exception(cudaPeekAtLastError()); - - color_enactor_type color_enactor( - &color_problem, // pass in a problem (contains data in/out) - multi_context); - - float elapsed = color_enactor.enact(); - return elapsed; -} - -template -float execute(vertex_t const& number_of_rows, - vertex_t const& number_of_columns, - edge_t const& number_of_nonzeros, - edge_vector_t& row_offsets, - vertex_vector_t& column_indices, - weight_vector_t& edge_values, - vertex_vector_t& colors) { - // Build graph structure for color - auto G = - graph::build::from_csr_t(number_of_rows, // number of rows - number_of_columns, // number of columns - number_of_nonzeros, // number of edges - row_offsets, // row offsets - column_indices, // column indices - edge_values); // nonzero values - - auto g = graph::build::from_csr_t( - number_of_rows, // number of rows - number_of_columns, // number of columns - number_of_nonzeros, // number of edges - row_offsets, // XXX: illegal device memory - column_indices, // XXX: illegal device memory - edge_values); // XXX: illegal device memory - - return color(G.data().get(), g.data(), colors.data().get()); -} - -} // namespace color -} // namespace gunrock \ No newline at end of file diff --git a/gunrock/applications/color/color_implementation.hxx b/gunrock/applications/color/color_implementation.hxx deleted file mode 100644 index ba239d65..00000000 --- a/gunrock/applications/color/color_implementation.hxx +++ /dev/null @@ -1,169 +0,0 @@ -/** - * @file color_implementation.hxx - * @author Muhammad Osama (mosama@ucdavis.edu) - * @brief Single-Source Shortest Path graph algorithm. This is where - * we actually implement color using operators. - * @version 0.1 - * @date 2020-10-05 - * - * @copyright Copyright (c) 2020 - * - */ -#pragma once - -#include -#include - -#include - -#include - -namespace gunrock { -namespace color { - -template -struct color_problem_t : problem_t { - // Get useful types from graph_type - using vertex_t = typename graph_type::vertex_type; - using weight_t = typename graph_type::weight_type; - - using weight_pointer_t = typename graph_type::weight_pointer_t; - using vertex_pointer_t = typename graph_type::vertex_pointer_t; - - // Useful types from problem_t - using problem_type = problem_t; - - thrust::device_vector randoms; - vertex_pointer_t colors; - - /** - * @brief Construct a new color problem t object - * - * @param G graph on GPU - * @param g graph on CPU - * @param context system context - * @param _colors output color per vertex array - */ - color_problem_t(graph_type* G, - host_graph_type* g, - std::shared_ptr context, - vertex_pointer_t _colors) - : problem_type(G, g, context), - colors(_colors), - randoms(g->get_number_of_vertices()) { - // XXX: Ugly. Initialize d_colors to be all INVALIDs. - auto d_colors = thrust::device_pointer_cast(colors); - thrust::fill(thrust::device, d_colors + 0, - d_colors + g->get_number_of_vertices(), - std::numeric_limits::max()); - - // Generate random numbers. - algo::generate::random::uniform_distribution(0, g->get_number_of_vertices(), - randoms.begin()); - } - - color_problem_t(const color_problem_t& rhs) = delete; - color_problem_t& operator=(const color_problem_t& rhs) = delete; -}; - -template -struct color_enactor_t : enactor_t { - using enactor_type = enactor_t; - - using vertex_t = typename algorithm_problem_t::vertex_t; - using edge_t = typename algorithm_problem_t::edge_t; - using weight_t = typename algorithm_problem_t::weight_t; - - /** - * @brief ... XXX - * - * @param context - */ - void loop(cuda::standard_context_t* context) override { - // Data slice - auto E = enactor_type::get_enactor(); - auto P = E->get_problem_pointer(); - auto G = P->get_graph_pointer(); - - auto colors = P->colors; - auto rand = P->randoms.data().get(); - auto iteration = E->iteration; - - /** - * @brief ... XXX - * - */ - auto color_me_in = [G, colors, rand, iteration] __host__ __device__( - vertex_t const& vertex) -> bool { - // If invalid vertex, exit early. - if (vertex == std::numeric_limits::max()) - return false; - - edge_t start_edge = G->get_starting_edge(vertex); - edge_t num_neighbors = G->get_number_of_neighbors(vertex); - - bool colormax = true; - bool colormin = true; - - // Color two nodes at the same time. - int color = iteration * 2; - - // Main loop that goes over all the neighbors and finds the maximum or - // minimum random number vertex. - for (edge_t e = start_edge; e < start_edge + num_neighbors; ++e) { - vertex_t u = G->get_destination_vertex(e); - - if ((colors[u] != std::numeric_limits::max()) && - (colors[u] != color + 1) && (colors[u] != color + 2) || - (vertex == u)) - continue; - if (rand[vertex] <= rand[u]) - colormax = false; - if (rand[vertex] >= rand[u]) - colormin = false; - } - - // Color if the node has the maximum OR minimum random number, this way, - // per iteration we can possibly fill 2 colors at the same time. - if (colormax) { - colors[vertex] = color + 1; - return false; // remove (colored). - } else if (colormin) { - colors[vertex] = color + 2; - return false; // remove (colored). - } else { - return true; // keep (not colored). - } - }; - - // Execute filter operator on the provided lambda. - operators::filter::execute( - G, E, color_me_in, context); - } - - /** - * @brief Populate the initial frontier with a the entire graph (nodes). - * - * @param context - */ - void prepare_frontier(cuda::standard_context_t* context) override { - auto E = enactor_type::get_enactor(); // Enactor pointer - auto P = E->get_problem_pointer(); // Problem pointer - auto g = P->get_host_graph_pointer(); // HOST graph pointer - auto f = E->get_active_frontier_buffer(); // active frontier - - // XXX: Find a better way to initialize the frontier to all nodes - for (vertex_t v = 0; v < g->get_number_of_vertices(); ++v) - f->push_back(v); - } - - color_enactor_t(algorithm_problem_t* problem, - std::shared_ptr context) - : enactor_type(problem, context) {} - - color_enactor_t(const color_enactor_t& rhs) = delete; - color_enactor_t& operator=(const color_enactor_t& rhs) = delete; -}; // struct color_enactor_t - -} // namespace color -} // namespace gunrock \ No newline at end of file diff --git a/gunrock/applications/sssp.hxx b/gunrock/applications/sssp.hxx new file mode 100644 index 00000000..707b8481 --- /dev/null +++ b/gunrock/applications/sssp.hxx @@ -0,0 +1,200 @@ +/** + * @file sssp_implementation.hxx + * @author Muhammad Osama (mosama@ucdavis.edu) + * @brief Single-Source Shortest Path graph algorithm. This is where + * we actually implement SSSP using operators. + * @version 0.1 + * @date 2020-10-05 + * + * @copyright Copyright (c) 2020 + * + */ +#pragma once + +#include + +#include + +namespace gunrock { +namespace sssp { + +template +struct param_t { + using vertex_t = typename meta_t::vertex_type; + + vertex_t single_source; + + param_t( + vertex_t _single_source + ) : + single_source(_single_source) {} +}; + +template +struct result_t { + using vertex_t = typename meta_t::vertex_type; + using weight_t = typename meta_t::weight_type; + + weight_t* distances; + vertex_t* predecessors; + + result_t( + weight_t* _distances, + vertex_t* _predecessors + ) : + distances(_distances), + predecessors(_predecessors) {} +}; + +template +struct problem_t : gunrock::problem_t { + // Use Base class constructor -- does this work? does it handle copy constructor? + using gunrock::problem_t::problem_t; + + using vertex_t = typename meta_t::vertex_type; + using edge_t = typename meta_t::edge_type; + using weight_t = typename meta_t::weight_type; + + thrust::device_vector visited; + + void init() { + auto n_vertices = this->get_meta_pointer()->get_number_of_vertices(); + visited.resize(n_vertices); + thrust::fill(thrust::device, visited.begin(), visited.end(), -1); + } + + void reset() { + auto n_vertices = this->get_meta_pointer()->get_number_of_vertices(); + + auto d_distances = thrust::device_pointer_cast(this->result->distances); + thrust::fill( + thrust::device, + d_distances + 0, + d_distances + n_vertices, + std::numeric_limits::max() + ); + + thrust::fill( + thrust::device, + d_distances + this->param->single_source, + d_distances + this->param->single_source + 1, + 0 + ); + + thrust::fill(thrust::device, visited.begin(), visited.end(), -1); // This does need to be reset in between runs though + } +}; + +template +struct enactor_t : gunrock::enactor_t { + // Use Base class constructor -- does this work? does it handle copy constructor? + using gunrock::enactor_t::enactor_t; + + using vertex_t = typename problem_t::vertex_t; + using edge_t = typename problem_t::edge_t; + using weight_t = typename problem_t::weight_t; + + void prepare_frontier(cuda::standard_context_t* context) override { + auto P = this->get_problem_pointer(); + auto f = this->get_active_frontier_buffer(); + f->push_back(P->param->single_source); + } + + void loop(cuda::standard_context_t* context) override { + // Data slice + auto E = this->get_enactor(); + auto P = this->get_problem_pointer(); + auto G = P->get_graph_pointer(); + + + auto single_source = P->param->single_source; + auto distances = P->result->distances; + auto visited = P->visited.data().get(); + + auto iteration = this->iteration; + + auto shortest_path = [distances, single_source] __host__ __device__( + vertex_t const& source, // ... source + vertex_t const& neighbor, // neighbor + edge_t const& edge, // edge + weight_t const& weight // weight (tuple). + ) -> bool { + weight_t source_distance = distances[source]; // use cached::load + weight_t distance_to_neighbor = source_distance + weight; + + // Check if the destination node has been claimed as someone's child + weight_t recover_distance = math::atomic::min(&(distances[neighbor]), distance_to_neighbor); + + return (distance_to_neighbor < recover_distance); + }; + + auto remove_completed_paths = [visited, iteration] __host__ __device__(vertex_t const& vertex) -> bool { + if (vertex == std::numeric_limits::max()) + return false; + + if (visited[vertex] == iteration) + return false; + + visited[vertex] = iteration; + return true; + }; + + // Execute advance operator on the provided lambda + operators::advance::execute( + G, E, shortest_path, context); + + // Execute filter operator on the provided lambda + operators::filter::execute( + G, E, remove_completed_paths, context); + } + +}; // struct enactor_t + + +// !! Helper -- This should go somewhere else -- @neoblizz, where? +auto get_default_context() { + std::vector devices; + devices.push_back(0); + + return std::shared_ptr( + new cuda::multi_context_t(devices)); +} + +template +float run( + std::shared_ptr& G, + std::shared_ptr& meta, + typename meta_t::vertex_type& single_source, // Parameter + typename meta_t::weight_type* distances, // Output + typename meta_t::vertex_type* predecessors // Output +) { + + // + param_t param(single_source); + result_t result(distances, predecessors); + // + // + auto multi_context = get_default_context(); + + using problem_type = problem_t, result_t>; + using enactor_type = enactor_t; + + problem_type problem( + G.get(), + meta.get(), + ¶m, + &result, + multi_context + ); + problem.init(); + problem.reset(); + + enactor_type enactor(&problem, multi_context); + return enactor.enact(); + // +} + +} // namespace sssp +} // namespace gunrock \ No newline at end of file diff --git a/gunrock/applications/sssp/sssp.hxx b/gunrock/applications/sssp/sssp.hxx deleted file mode 100644 index 412546b4..00000000 --- a/gunrock/applications/sssp/sssp.hxx +++ /dev/null @@ -1,78 +0,0 @@ -/** - * @file sssp.hxx - * @author Muhammad Osama (mosama@ucdavis.edu) - * @brief Single-Source Shortest Path graph algorithm. - * @version 0.1 - * @date 2020-10-05 - * - * @copyright Copyright (c) 2020 - * - */ - -#include - -#pragma once - -namespace gunrock { -namespace sssp { - -using namespace memory; - -template -float sssp(std::shared_ptr& G, - std::shared_ptr& g, - typename graph_type::vertex_type source, - typename graph_type::weight_pointer_t distances) { - using sssp_problem_type = sssp_problem_t; - using sssp_enactor_type = sssp_enactor_t; - using weight_t = typename graph_type::weight_type; - - // Create contexts for all the devices - std::vector devices; - devices.push_back(0); - - auto multi_context = std::shared_ptr( - new cuda::multi_context_t(devices)); - - std::shared_ptr sssp_problem( - std::make_shared(G.get(), // input graph (GPU) - g.get(), // input graph (CPU) - multi_context, // input context - source, // input source - distances, // output distances - nullptr // output predecessors - )); - - std::shared_ptr sssp_enactor( - std::make_shared( - sssp_problem.get(), // pass in a problem (contains data in/out) - multi_context)); - - float elapsed = sssp_enactor->enact(); - return elapsed; -} - -template -float execute(csr_device_t& csr, - vertex_t const& source, - weight_vector_t& distances) { - // Build graph structure for SSSP - auto G = graph::build::from_csr_t( - csr.number_of_rows, // number of rows - csr.number_of_columns, // number of columns - csr.number_of_nonzeros, // number of edges - csr.row_offsets, // row offsets - csr.column_indices, // column indices - csr.nonzero_values); // nonzero values - - // XXX: Rework, there should be a way to hide this: - auto g = graph::build::meta_graph(csr.number_of_rows, // number of rows - csr.number_of_columns, // number of columns - csr.number_of_nonzeros // number of edges - ); - - return sssp(G, g, source, distances.data().get()); -} - -} // namespace sssp -} // namespace gunrock \ No newline at end of file diff --git a/gunrock/applications/sssp/sssp_implementation.hxx b/gunrock/applications/sssp/sssp_implementation.hxx deleted file mode 100644 index a69da044..00000000 --- a/gunrock/applications/sssp/sssp_implementation.hxx +++ /dev/null @@ -1,172 +0,0 @@ -/** - * @file sssp_implementation.hxx - * @author Muhammad Osama (mosama@ucdavis.edu) - * @brief Single-Source Shortest Path graph algorithm. This is where - * we actually implement SSSP using operators. - * @version 0.1 - * @date 2020-10-05 - * - * @copyright Copyright (c) 2020 - * - */ -#pragma once - -#include -#include - -namespace gunrock { -namespace sssp { - -template -struct sssp_problem_t : problem_t { - // Get useful types from graph_type - using vertex_t = typename graph_type::vertex_type; - using weight_t = typename graph_type::weight_type; - - using weight_pointer_t = typename graph_type::weight_pointer_t; - using vertex_pointer_t = typename graph_type::vertex_pointer_t; - - // Useful types from problem_t - using problem_type = problem_t; - - vertex_t single_source; - weight_pointer_t distances; - vertex_pointer_t predecessors; - thrust::device_vector visited; - - /** - * @brief Construct a new sssp problem t object - * - * @param G graph on GPU - * @param g graph on CPU - * @param context system context - * @param source input single source for sssp - * @param dist output distance pointer - * @param preds output predecessors pointer - */ - sssp_problem_t(graph_type* G, - host_graph_type* g, - std::shared_ptr context, - vertex_t& source, - weight_pointer_t dist, - vertex_pointer_t preds) - : problem_type(G, g, context), - single_source(source), - distances(dist), - predecessors(preds), - visited(g[0].get_number_of_vertices(), -1) { - // Set all initial distances to INFINITY - auto d_dist = thrust::device_pointer_cast(distances); - thrust::fill(thrust::device, d_dist + 0, - d_dist + g[0].get_number_of_vertices(), - std::numeric_limits::max()); - thrust::fill(thrust::device, d_dist + source, d_dist + source + 1, 0); - } - - sssp_problem_t(const sssp_problem_t& rhs) = delete; - sssp_problem_t& operator=(const sssp_problem_t& rhs) = delete; -}; - -template -struct sssp_enactor_t : enactor_t { - using enactor_type = enactor_t; - - using vertex_t = typename algorithm_problem_t::vertex_t; - using edge_t = typename algorithm_problem_t::edge_t; - using weight_t = typename algorithm_problem_t::weight_t; - - /** - * @brief Populate the initial frontier with a single source node from where - * we begin shortest path traversal. - * - * @param context - */ - void prepare_frontier(cuda::standard_context_t* context) override { - auto P = enactor_type::get_problem_pointer(); - auto single_source = P->single_source; - - auto f = enactor_type::get_active_frontier_buffer(); - f->push_back(single_source); - } - - /** - * @brief This is the core of the implementation for SSSP algorithm. loops - * till the convergence condition is met (see: is_converged()). Note that this - * function is on the host and is timed, so make sure you are writing the most - * efficient implementation possible. Avoid performing copies in this function - * or running API calls that are incredibly slow (such as printfs), unless - * they are part of your algorithms' implementation. - * - * @param context - */ - void loop(cuda::standard_context_t* context) override { - // Data slice - auto P = enactor_type::get_problem_pointer(); - auto G = P->get_graph_pointer(); - auto distances = P->distances; - auto single_source = P->single_source; - auto visited = P->visited.data().get(); - auto iteration = enactor_type::iteration; - - /** - * @brief Lambda operator to advance to neighboring vertices from the - * source vertices in the frontier, and marking the vertex to stay in the - * frontier if and only if it finds a new shortest distance, otherwise, - * it's shortest distance is found and we mark to remove the vertex from - * the frontier. - * - */ - auto shortest_path = [distances, single_source] __host__ __device__( - vertex_t const& source, // ... source - vertex_t const& neighbor, // neighbor - edge_t const& edge, // edge - weight_t const& weight // weight (tuple). - ) -> bool { - weight_t source_distance = distances[source]; // use cached::load - weight_t distance_to_neighbor = source_distance + weight; - - // Check if the destination node has been claimed as someone's child - weight_t recover_distance = - math::atomic::min(&(distances[neighbor]), distance_to_neighbor); - - if (distance_to_neighbor < recover_distance) - return true; // mark to keep - return false; // mark for removal - }; - - /** - * @brief Lambda operator to determine which vertices to filter and which - * to keep. - * - */ - auto remove_completed_paths = [visited, iteration] __host__ __device__( - vertex_t const& vertex) -> bool { - if (vertex == std::numeric_limits::max()) - return false; - if (visited[vertex] == iteration) - return false; - visited[vertex] = iteration; - return true; - }; - - // Execute advance operator on the provided lambda - operators::advance::execute( - G, enactor_type::get_enactor(), shortest_path, context); - - // Execute filter operator on the provided lambda - operators::filter::execute( - G, enactor_type::get_enactor(), remove_completed_paths, context); - } - - sssp_enactor_t(algorithm_problem_t* _problem, - std::shared_ptr _context) - : enactor_type(_problem, _context) {} - - sssp_enactor_t(const sssp_enactor_t& rhs) = delete; - sssp_enactor_t& operator=(const sssp_enactor_t& rhs) = delete; -}; // struct sssp_enactor_t - -} // namespace sssp -} // namespace gunrock \ No newline at end of file diff --git a/gunrock/formats/csr.hxx b/gunrock/formats/csr.hxx index 4aa5aea0..3768eff1 100644 --- a/gunrock/formats/csr.hxx +++ b/gunrock/formats/csr.hxx @@ -59,7 +59,7 @@ struct csr_t { * @param coo * @return csr_t& */ - csr_t& operator=( + csr_tfrom_coo( const coo_t& coo) { number_of_rows = coo.number_of_rows; number_of_columns = coo.number_of_columns; diff --git a/gunrock/framework/enactor.hxx b/gunrock/framework/enactor.hxx index 394f1641..88b22ffc 100644 --- a/gunrock/framework/enactor.hxx +++ b/gunrock/framework/enactor.hxx @@ -65,9 +65,9 @@ struct enactor_t { buffer_selector(0), iteration(0), scanned_work_domain( - problem->get_host_graph_pointer()->get_number_of_vertices()) { + problem->get_meta_pointer()->get_number_of_vertices()) { // Set temporary buffer to be at least the number of edges - auto g = problem->get_host_graph_pointer(); + auto g = problem->get_meta_pointer(); auto buffer = get_inactive_frontier_buffer(); buffer->reserve(g->get_number_of_edges()); } diff --git a/gunrock/framework/problem.hxx b/gunrock/framework/problem.hxx index 5ff20d0f..c1bb45b2 100644 --- a/gunrock/framework/problem.hxx +++ b/gunrock/framework/problem.hxx @@ -20,31 +20,38 @@ namespace gunrock { * replicated or partitioned to multiple instances (for example, in a multi-gpu * context). In the algorithms' problem constructor, initialize your data. * - * @tparam graph_type - * @tparam host_graph_type + * @tparam graph_t + * @tparam meta_t */ -template +template struct problem_t { - using vertex_t = typename graph_type::vertex_type; - using edge_t = typename graph_type::edge_type; - using weight_t = typename graph_type::weight_type; + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + param_t* param; + result_t* result; - using vertex_pointer_t = typename graph_type::vertex_pointer_t; - using edge_pointer_t = typename graph_type::edge_pointer_t; - using weight_pointer_t = typename graph_type::weight_pointer_t; - - graph_type* graph_slice; - host_graph_type* host_graph_slice; + graph_t* graph_slice; + meta_t* meta_slice; std::shared_ptr context; - graph_type* get_graph_pointer() const { return graph_slice; } - host_graph_type* get_host_graph_pointer() const { return host_graph_slice; } - + graph_t* get_graph_pointer() const { return graph_slice; } + meta_t* get_meta_pointer() const { return meta_slice; } + problem_t() : graph_slice(nullptr) {} - problem_t(graph_type* G, - host_graph_type* g, - std::shared_ptr _context) - : graph_slice(G), host_graph_slice(g), context(_context) {} + problem_t( + graph_t* G, + meta_t* meta, + param_t* _param, + result_t* _result, + std::shared_ptr _context + ) : + graph_slice(G), + meta_slice(meta), + param(_param), + result(_result), + context(_context) { } // Disable copy ctor and assignment operator. // We do not want to let user copy only a slice. diff --git a/gunrock/graph/build.hxx b/gunrock/graph/build.hxx index 9a3cc40c..c92d614b 100644 --- a/gunrock/graph/build.hxx +++ b/gunrock/graph/build.hxx @@ -11,6 +11,8 @@ #pragma once +#include + namespace gunrock { namespace graph { namespace build { @@ -82,68 +84,83 @@ void csr_t(graph_type I, graph_type* G) { } // namespace host template -auto from_csr_t(typename vertex_vector_t::value_type const& r, - typename vertex_vector_t::value_type const& c, - typename edge_vector_t::value_type const& nnz, - edge_vector_t& Ap, - vertex_vector_t& Aj, - weight_vector_t& Ax) { - using vertex_type = typename vertex_vector_t::value_type; - using edge_type = typename edge_vector_t::value_type; - using weight_type = typename weight_vector_t::value_type; - - auto Ap_ptr = memory::raw_pointer_cast(Ap.data()); - auto Aj_ptr = memory::raw_pointer_cast(Aj.data()); - auto Ax_ptr = memory::raw_pointer_cast(Ax.data()); - + typename edge_type, + typename vertex_type, + typename weight_type> +auto _from_csr_t(vertex_type const& r, + vertex_type const& c, + edge_type const& nnz, + edge_type* Ap_ptr, + vertex_type* Aj_ptr, + weight_type* Ax_ptr) { + using graph_type = graph::graph_t< space, vertex_type, edge_type, weight_type, graph::graph_csr_t>; - auto deleter = [&](graph_type* ptr) { memory::free(ptr, space); }; - std::shared_ptr O( - memory::allocate(sizeof(graph_type), space), deleter); - graph_type G; - G.set(r, c, nnz, Ap_ptr, Aj_ptr, Ax_ptr); + auto graph_deleter = [&](graph_type* ptr) { memory::free(ptr, space); }; + std::shared_ptr G_ptr( + memory::allocate(sizeof(graph_type), space), graph_deleter); + if (space == memory_space_t::device) { - device::csr_t(G, O.get()); - // memory::raw_pointer_cast(O.data())); + device::csr_t(G, G_ptr.get()); } else { - host::csr_t(G, O.get()); - // memory::raw_pointer_cast(O.data())); + host::csr_t(G, G_ptr.get()); } - - return O; + + return G_ptr; +} + +template +auto from_csr_t(vertex_type const& r, + vertex_type const& c, + edge_type const& nnz, + edge_type* Ap_ptr, + vertex_type* Aj_ptr, + weight_type* Ax_ptr) { + + // From raw pointers + auto G_ptr = _from_csr_t(r, c, nnz, Ap_ptr, Aj_ptr, Ax_ptr); + auto meta_ptr = _from_csr_t(r, c, nnz, nullptr, nullptr, nullptr); + return std::make_pair(G_ptr, meta_ptr); } -template -auto meta_graph(vertex_t const& r, vertex_t const& c, edge_t const& nnz) { - using vertex_type = vertex_t; - using edge_type = edge_t; - using weight_type = edge_t; - - constexpr memory_space_t space = memory_space_t::host; - - using graph_type = graph::graph_t< - space, vertex_type, edge_type, weight_type, - graph::graph_csr_t>; - - auto deleter = [&](graph_type* ptr) { memory::free(ptr, space); }; - std::shared_ptr O( - memory::allocate(sizeof(graph_type), space), deleter); - - graph_type G; - - G.set(r, c, nnz, nullptr, nullptr, nullptr); - host::csr_t(G, O.get()); +template +auto from_csr_t(typename vertex_vector_t::value_type const& r, + typename vertex_vector_t::value_type const& c, + typename edge_vector_t::value_type const& nnz, + edge_vector_t& Ap, + vertex_vector_t& Aj, + weight_vector_t& Ax) { + // From thrust vectors + return from_csr_t( + r, c, nnz, + memory::raw_pointer_cast(Ap.data()), + memory::raw_pointer_cast(Aj.data()), + memory::raw_pointer_cast(Ax.data()) + ); +} - return O; +template +auto from_csr_t(csr_t* csr) { + // From a CSR object + return from_csr_t( + csr->number_of_rows, + csr->number_of_columns, + csr->number_of_nonzeros, + csr->row_offsets, + csr->column_indices, + csr->nonzero_values + ); } } // namespace build