Merge pull request #73 from bkj/dev/ppr

[APP] Parallel PR-Nibble
gunrock · May 16, 2021 · 8121b38 · 8121b38
2 parents 0e14985 + f30b0d3
commit 8121b38
Show file tree

Hide file tree

Showing 6 changed files with 458 additions and 25 deletions.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -4,5 +4,6 @@ add_subdirectory(bfs)
 add_subdirectory(color)
 add_subdirectory(geo)
 add_subdirectory(pr)
+add_subdirectory(ppr)
 add_subdirectory(bc)
 # end /* Add examples' subdirectories */
diff --git a/examples/ppr/CMakeLists.txt b/examples/ppr/CMakeLists.txt
@@ -0,0 +1,21 @@
+# begin /* Set the application name. */
+set(APPLICATION_NAME ppr)
+# end /* Set the application name. */
+
+# begin /* Add CUDA executables */
+add_executable(${APPLICATION_NAME})
+
+set(SOURCE_LIST 
+    ${APPLICATION_NAME}.cu
+)
+
+target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
+target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
+get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
+set_target_properties(${APPLICATION_NAME} 
+    PROPERTIES 
+        CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
+) # XXX: Find a better way to inherit essentials properties.
+
+message("-- Example Added: ${APPLICATION_NAME}")
+# end /* Add CUDA executables */
diff --git a/examples/ppr/ppr.cu b/examples/ppr/ppr.cu
@@ -0,0 +1,93 @@
+#include <gunrock/applications/ppr.hxx>
+#include "ppr_cpu.hxx"
+
+using namespace gunrock;
+using namespace memory;
+
+void test_ppr(int num_arguments, char** argument_array) {
+  if (num_arguments != 2) {
+    std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl;
+    exit(1);
+  }
+
+  // --
+  // Define types
+
+  using vertex_t = int;
+  using edge_t = int;
+  using weight_t = float;
+
+  using csr_t = format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
+  csr_t csr;
+
+  // --
+  // IO
+
+  weight_t alpha   = 0.15;
+  weight_t epsilon = 1e-6;
+  vertex_t n_seeds = 50;
+
+  std::string filename = argument_array[1];
+
+  if(util::is_market(filename)) {
+    io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
+    csr.from_coo(mm.load(filename));
+  } else if(util::is_binary_csr(filename)) {
+    csr.read_binary(filename);
+  } else {
+    std::cerr << "Unknown file format: " << filename << std::endl;
+    exit(1);
+  }
+
+  // --
+  // Build graph
+
+  auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>(
+      csr.number_of_rows,               // rows
+      csr.number_of_columns,            // columns
+      csr.number_of_nonzeros,           // nonzeros
+      csr.row_offsets.data().get(),     // row_offsets
+      csr.column_indices.data().get(),  // column_indices
+      csr.nonzero_values.data().get()   // values
+  );  // supports row_indices and column_offsets (default = nullptr)
+
+  // --
+  // Params and memory allocation
+
+  vertex_t n_vertices = G.get_number_of_vertices();
+
+  thrust::device_vector<weight_t> p(n_seeds * n_vertices);
+
+  // --
+  // GPU Run
+
+  float gpu_elapsed = gunrock::ppr::run_batch(
+      G, n_seeds, p.data().get(), alpha, epsilon);
+
+  // --
+  // CPU Run
+
+  thrust::host_vector<weight_t> h_p(n_seeds * n_vertices);
+
+  float cpu_elapsed = ppr_cpu::run<csr_t, vertex_t, edge_t, weight_t>(
+      csr, n_seeds, h_p.data(), alpha, epsilon);
+
+  int n_errors = ppr_cpu::compute_error(p, h_p);
+
+  // --
+  // Log + Validate
+
+  std::cout << "GPU distances[:40] = ";
+  gunrock::print::head<weight_t>(p, 40);
+
+  std::cout << "CPU Distances (output) = ";
+  gunrock::print::head<weight_t>(h_p, 40);
+
+  std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
+  std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
+  std::cout << "Number of errors : " << n_errors               << std::endl;
+}
+
+int main(int argc, char** argv) {
+  test_ppr(argc, argv);
+}
diff --git a/examples/ppr/ppr_cpu.hxx b/examples/ppr/ppr_cpu.hxx
@@ -0,0 +1,117 @@
+#pragma once
+
+#include <chrono>
+#include <vector>
+#include <queue>
+
+namespace ppr_cpu {
+
+using namespace std;
+using namespace std::chrono;
+
+template <typename csr_t, typename vertex_t, typename edge_t, typename weight_t>
+float run(csr_t& csr,
+          vertex_t& n_seeds,
+          weight_t* all_p,
+          weight_t& alpha,
+          weight_t& epsilon) {
+
+  thrust::host_vector<edge_t> _rowptr(csr.row_offsets);  // Copy data to CPU
+  thrust::host_vector<vertex_t> _columns(csr.column_indices);
+  thrust::host_vector<weight_t> _csr_data(csr.nonzero_values);
+
+  edge_t* rowptr     = _rowptr.data();
+  vertex_t* columns  = _columns.data();
+  weight_t* csr_data = _csr_data.data();
+
+  vertex_t n_nodes = csr.number_of_rows;
+
+  auto t_start = high_resolution_clock::now();
+
+  weight_t* r       = (weight_t*)malloc(n_nodes * sizeof(weight_t));
+  weight_t* r_prime = (weight_t*)malloc(n_nodes * sizeof(weight_t));
+
+  vertex_t* f       = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));
+  vertex_t* f_prime = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));
+
+  vertex_t* degrees = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));
+
+  for(vertex_t seed = 0; seed < n_seeds; seed++) {
+
+    weight_t* p = all_p + (seed * n_nodes);
+
+    for(vertex_t i = 0; i < n_nodes; i++) {
+      r[i]       = 0;
+      r_prime[i] = 0;
+      f[i]       = 0;
+      f_prime[i] = 0;
+      degrees[i] = rowptr[i + 1] - rowptr[i];
+    }
+
+    r[seed]       = 1;
+    r_prime[seed] = 1;
+    f[0]          = seed;
+
+    vertex_t f_size       = 1;
+    vertex_t f_prime_size = 0;
+
+    while(f_size > 0) {
+        for(vertex_t i = 0; i < f_size; i++) {
+            vertex_t node_idx = f[i];
+            p[node_idx] += (2 * alpha) / (1 + alpha) * r[node_idx];
+            r_prime[node_idx] = 0;
+        }
+
+        for(vertex_t i = 0; i < f_size; i++) {
+            vertex_t src_idx    = f[i];
+            vertex_t deg        = degrees[src_idx];
+            vertex_t offset     = rowptr[src_idx];
+            weight_t inv_r_deg  = r[src_idx] / deg;
+
+            for(vertex_t j = 0; j < deg; j++) {
+                vertex_t dst_idx = columns[offset + j];
+                weight_t update = ((1 - alpha) / (1 + alpha)) * inv_r_deg;
+
+                weight_t oldval = r_prime[dst_idx];
+                weight_t newval = r_prime[dst_idx] + update;
+                weight_t thresh = degrees[dst_idx] * epsilon;
+
+                r_prime[dst_idx] = newval;
+
+                if((oldval < thresh) && (newval >= thresh)) {
+                    f_prime[f_prime_size] = dst_idx;
+                    f_prime_size++;
+                }
+            }
+        }
+
+        memcpy(r, r_prime, n_nodes * sizeof(weight_t));
+
+        vertex_t* tmp_ptr = f;
+        f                 = f_prime;
+        f_size            = f_prime_size;
+        f_prime           = tmp_ptr;
+        f_prime_size      = 0;
+    }
+  }
+
+  auto t_stop  = high_resolution_clock::now();
+  auto elapsed = duration_cast<microseconds>(t_stop - t_start).count();
+  return (float)elapsed / 1000;
+}
+
+template <typename val_t>
+int compute_error(thrust::device_vector<val_t> _gpu_result,
+                  thrust::host_vector<val_t> cpu_result) {
+  thrust::host_vector<val_t> gpu_result(_gpu_result);
+
+  int n_errors = 0;
+  for (int i = 0; i < cpu_result.size(); i++) {
+    if (abs(gpu_result[i] - cpu_result[i]) > 1e-6) {
+      n_errors++;
+    }
+  }
+  return n_errors;
+}
+
+}  // namespace sssp_cpu