Skip to content
This repository has been archived by the owner on Dec 22, 2022. It is now read-only.

Commit

Permalink
Merge pull request #73 from bkj/dev/ppr
Browse files Browse the repository at this point in the history
[APP] Parallel PR-Nibble
  • Loading branch information
neoblizz authored May 16, 2021
2 parents 0e14985 + f30b0d3 commit 8121b38
Show file tree
Hide file tree
Showing 6 changed files with 458 additions and 25 deletions.
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,6 @@ add_subdirectory(bfs)
add_subdirectory(color)
add_subdirectory(geo)
add_subdirectory(pr)
add_subdirectory(ppr)
add_subdirectory(bc)
# end /* Add examples' subdirectories */
21 changes: 21 additions & 0 deletions examples/ppr/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# begin /* Set the application name. */
set(APPLICATION_NAME ppr)
# end /* Set the application name. */

# begin /* Add CUDA executables */
add_executable(${APPLICATION_NAME})

set(SOURCE_LIST
${APPLICATION_NAME}.cu
)

target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST})
target_link_libraries(${APPLICATION_NAME} PRIVATE essentials)
get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES)
set_target_properties(${APPLICATION_NAME}
PROPERTIES
CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES}
) # XXX: Find a better way to inherit essentials properties.

message("-- Example Added: ${APPLICATION_NAME}")
# end /* Add CUDA executables */
93 changes: 93 additions & 0 deletions examples/ppr/ppr.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
#include <gunrock/applications/ppr.hxx>
#include "ppr_cpu.hxx"

using namespace gunrock;
using namespace memory;

void test_ppr(int num_arguments, char** argument_array) {
if (num_arguments != 2) {
std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl;
exit(1);
}

// --
// Define types

using vertex_t = int;
using edge_t = int;
using weight_t = float;

using csr_t = format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>;
csr_t csr;

// --
// IO

weight_t alpha = 0.15;
weight_t epsilon = 1e-6;
vertex_t n_seeds = 50;

std::string filename = argument_array[1];

if(util::is_market(filename)) {
io::matrix_market_t<vertex_t, edge_t, weight_t> mm;
csr.from_coo(mm.load(filename));
} else if(util::is_binary_csr(filename)) {
csr.read_binary(filename);
} else {
std::cerr << "Unknown file format: " << filename << std::endl;
exit(1);
}

// --
// Build graph

auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>(
csr.number_of_rows, // rows
csr.number_of_columns, // columns
csr.number_of_nonzeros, // nonzeros
csr.row_offsets.data().get(), // row_offsets
csr.column_indices.data().get(), // column_indices
csr.nonzero_values.data().get() // values
); // supports row_indices and column_offsets (default = nullptr)

// --
// Params and memory allocation

vertex_t n_vertices = G.get_number_of_vertices();

thrust::device_vector<weight_t> p(n_seeds * n_vertices);

// --
// GPU Run

float gpu_elapsed = gunrock::ppr::run_batch(
G, n_seeds, p.data().get(), alpha, epsilon);

// --
// CPU Run

thrust::host_vector<weight_t> h_p(n_seeds * n_vertices);

float cpu_elapsed = ppr_cpu::run<csr_t, vertex_t, edge_t, weight_t>(
csr, n_seeds, h_p.data(), alpha, epsilon);

int n_errors = ppr_cpu::compute_error(p, h_p);

// --
// Log + Validate

std::cout << "GPU distances[:40] = ";
gunrock::print::head<weight_t>(p, 40);

std::cout << "CPU Distances (output) = ";
gunrock::print::head<weight_t>(h_p, 40);

std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl;
std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl;
std::cout << "Number of errors : " << n_errors << std::endl;
}

int main(int argc, char** argv) {
test_ppr(argc, argv);
}
117 changes: 117 additions & 0 deletions examples/ppr/ppr_cpu.hxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
#pragma once

#include <chrono>
#include <vector>
#include <queue>

namespace ppr_cpu {

using namespace std;
using namespace std::chrono;

template <typename csr_t, typename vertex_t, typename edge_t, typename weight_t>
float run(csr_t& csr,
vertex_t& n_seeds,
weight_t* all_p,
weight_t& alpha,
weight_t& epsilon) {

thrust::host_vector<edge_t> _rowptr(csr.row_offsets); // Copy data to CPU
thrust::host_vector<vertex_t> _columns(csr.column_indices);
thrust::host_vector<weight_t> _csr_data(csr.nonzero_values);

edge_t* rowptr = _rowptr.data();
vertex_t* columns = _columns.data();
weight_t* csr_data = _csr_data.data();

vertex_t n_nodes = csr.number_of_rows;

auto t_start = high_resolution_clock::now();

weight_t* r = (weight_t*)malloc(n_nodes * sizeof(weight_t));
weight_t* r_prime = (weight_t*)malloc(n_nodes * sizeof(weight_t));

vertex_t* f = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));
vertex_t* f_prime = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));

vertex_t* degrees = (vertex_t*)malloc(n_nodes * sizeof(vertex_t));

for(vertex_t seed = 0; seed < n_seeds; seed++) {

weight_t* p = all_p + (seed * n_nodes);

for(vertex_t i = 0; i < n_nodes; i++) {
r[i] = 0;
r_prime[i] = 0;
f[i] = 0;
f_prime[i] = 0;
degrees[i] = rowptr[i + 1] - rowptr[i];
}

r[seed] = 1;
r_prime[seed] = 1;
f[0] = seed;

vertex_t f_size = 1;
vertex_t f_prime_size = 0;

while(f_size > 0) {
for(vertex_t i = 0; i < f_size; i++) {
vertex_t node_idx = f[i];
p[node_idx] += (2 * alpha) / (1 + alpha) * r[node_idx];
r_prime[node_idx] = 0;
}

for(vertex_t i = 0; i < f_size; i++) {
vertex_t src_idx = f[i];
vertex_t deg = degrees[src_idx];
vertex_t offset = rowptr[src_idx];
weight_t inv_r_deg = r[src_idx] / deg;

for(vertex_t j = 0; j < deg; j++) {
vertex_t dst_idx = columns[offset + j];
weight_t update = ((1 - alpha) / (1 + alpha)) * inv_r_deg;

weight_t oldval = r_prime[dst_idx];
weight_t newval = r_prime[dst_idx] + update;
weight_t thresh = degrees[dst_idx] * epsilon;

r_prime[dst_idx] = newval;

if((oldval < thresh) && (newval >= thresh)) {
f_prime[f_prime_size] = dst_idx;
f_prime_size++;
}
}
}

memcpy(r, r_prime, n_nodes * sizeof(weight_t));

vertex_t* tmp_ptr = f;
f = f_prime;
f_size = f_prime_size;
f_prime = tmp_ptr;
f_prime_size = 0;
}
}

auto t_stop = high_resolution_clock::now();
auto elapsed = duration_cast<microseconds>(t_stop - t_start).count();
return (float)elapsed / 1000;
}

template <typename val_t>
int compute_error(thrust::device_vector<val_t> _gpu_result,
thrust::host_vector<val_t> cpu_result) {
thrust::host_vector<val_t> gpu_result(_gpu_result);

int n_errors = 0;
for (int i = 0; i < cpu_result.size(); i++) {
if (abs(gpu_result[i] - cpu_result[i]) > 1e-6) {
n_errors++;
}
}
return n_errors;
}

} // namespace sssp_cpu
Loading

0 comments on commit 8121b38

Please sign in to comment.