This repository has been archived by the owner on Dec 22, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #73 from bkj/dev/ppr
[APP] Parallel PR-Nibble
- Loading branch information
Showing
6 changed files
with
458 additions
and
25 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# begin /* Set the application name. */ | ||
set(APPLICATION_NAME ppr) | ||
# end /* Set the application name. */ | ||
|
||
# begin /* Add CUDA executables */ | ||
add_executable(${APPLICATION_NAME}) | ||
|
||
set(SOURCE_LIST | ||
${APPLICATION_NAME}.cu | ||
) | ||
|
||
target_sources(${APPLICATION_NAME} PRIVATE ${SOURCE_LIST}) | ||
target_link_libraries(${APPLICATION_NAME} PRIVATE essentials) | ||
get_target_property(ESSENTIALS_ARCHITECTURES essentials CUDA_ARCHITECTURES) | ||
set_target_properties(${APPLICATION_NAME} | ||
PROPERTIES | ||
CUDA_ARCHITECTURES ${ESSENTIALS_ARCHITECTURES} | ||
) # XXX: Find a better way to inherit essentials properties. | ||
|
||
message("-- Example Added: ${APPLICATION_NAME}") | ||
# end /* Add CUDA executables */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
#include <gunrock/applications/ppr.hxx> | ||
#include "ppr_cpu.hxx" | ||
|
||
using namespace gunrock; | ||
using namespace memory; | ||
|
||
void test_ppr(int num_arguments, char** argument_array) { | ||
if (num_arguments != 2) { | ||
std::cerr << "usage: ./bin/<program-name> filename.mtx" << std::endl; | ||
exit(1); | ||
} | ||
|
||
// -- | ||
// Define types | ||
|
||
using vertex_t = int; | ||
using edge_t = int; | ||
using weight_t = float; | ||
|
||
using csr_t = format::csr_t<memory_space_t::device, vertex_t, edge_t, weight_t>; | ||
csr_t csr; | ||
|
||
// -- | ||
// IO | ||
|
||
weight_t alpha = 0.15; | ||
weight_t epsilon = 1e-6; | ||
vertex_t n_seeds = 50; | ||
|
||
std::string filename = argument_array[1]; | ||
|
||
if(util::is_market(filename)) { | ||
io::matrix_market_t<vertex_t, edge_t, weight_t> mm; | ||
csr.from_coo(mm.load(filename)); | ||
} else if(util::is_binary_csr(filename)) { | ||
csr.read_binary(filename); | ||
} else { | ||
std::cerr << "Unknown file format: " << filename << std::endl; | ||
exit(1); | ||
} | ||
|
||
// -- | ||
// Build graph | ||
|
||
auto G = graph::build::from_csr<memory_space_t::device, graph::view_t::csr>( | ||
csr.number_of_rows, // rows | ||
csr.number_of_columns, // columns | ||
csr.number_of_nonzeros, // nonzeros | ||
csr.row_offsets.data().get(), // row_offsets | ||
csr.column_indices.data().get(), // column_indices | ||
csr.nonzero_values.data().get() // values | ||
); // supports row_indices and column_offsets (default = nullptr) | ||
|
||
// -- | ||
// Params and memory allocation | ||
|
||
vertex_t n_vertices = G.get_number_of_vertices(); | ||
|
||
thrust::device_vector<weight_t> p(n_seeds * n_vertices); | ||
|
||
// -- | ||
// GPU Run | ||
|
||
float gpu_elapsed = gunrock::ppr::run_batch( | ||
G, n_seeds, p.data().get(), alpha, epsilon); | ||
|
||
// -- | ||
// CPU Run | ||
|
||
thrust::host_vector<weight_t> h_p(n_seeds * n_vertices); | ||
|
||
float cpu_elapsed = ppr_cpu::run<csr_t, vertex_t, edge_t, weight_t>( | ||
csr, n_seeds, h_p.data(), alpha, epsilon); | ||
|
||
int n_errors = ppr_cpu::compute_error(p, h_p); | ||
|
||
// -- | ||
// Log + Validate | ||
|
||
std::cout << "GPU distances[:40] = "; | ||
gunrock::print::head<weight_t>(p, 40); | ||
|
||
std::cout << "CPU Distances (output) = "; | ||
gunrock::print::head<weight_t>(h_p, 40); | ||
|
||
std::cout << "GPU Elapsed Time : " << gpu_elapsed << " (ms)" << std::endl; | ||
std::cout << "CPU Elapsed Time : " << cpu_elapsed << " (ms)" << std::endl; | ||
std::cout << "Number of errors : " << n_errors << std::endl; | ||
} | ||
|
||
int main(int argc, char** argv) { | ||
test_ppr(argc, argv); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
#pragma once | ||
|
||
#include <chrono> | ||
#include <vector> | ||
#include <queue> | ||
|
||
namespace ppr_cpu { | ||
|
||
using namespace std; | ||
using namespace std::chrono; | ||
|
||
template <typename csr_t, typename vertex_t, typename edge_t, typename weight_t> | ||
float run(csr_t& csr, | ||
vertex_t& n_seeds, | ||
weight_t* all_p, | ||
weight_t& alpha, | ||
weight_t& epsilon) { | ||
|
||
thrust::host_vector<edge_t> _rowptr(csr.row_offsets); // Copy data to CPU | ||
thrust::host_vector<vertex_t> _columns(csr.column_indices); | ||
thrust::host_vector<weight_t> _csr_data(csr.nonzero_values); | ||
|
||
edge_t* rowptr = _rowptr.data(); | ||
vertex_t* columns = _columns.data(); | ||
weight_t* csr_data = _csr_data.data(); | ||
|
||
vertex_t n_nodes = csr.number_of_rows; | ||
|
||
auto t_start = high_resolution_clock::now(); | ||
|
||
weight_t* r = (weight_t*)malloc(n_nodes * sizeof(weight_t)); | ||
weight_t* r_prime = (weight_t*)malloc(n_nodes * sizeof(weight_t)); | ||
|
||
vertex_t* f = (vertex_t*)malloc(n_nodes * sizeof(vertex_t)); | ||
vertex_t* f_prime = (vertex_t*)malloc(n_nodes * sizeof(vertex_t)); | ||
|
||
vertex_t* degrees = (vertex_t*)malloc(n_nodes * sizeof(vertex_t)); | ||
|
||
for(vertex_t seed = 0; seed < n_seeds; seed++) { | ||
|
||
weight_t* p = all_p + (seed * n_nodes); | ||
|
||
for(vertex_t i = 0; i < n_nodes; i++) { | ||
r[i] = 0; | ||
r_prime[i] = 0; | ||
f[i] = 0; | ||
f_prime[i] = 0; | ||
degrees[i] = rowptr[i + 1] - rowptr[i]; | ||
} | ||
|
||
r[seed] = 1; | ||
r_prime[seed] = 1; | ||
f[0] = seed; | ||
|
||
vertex_t f_size = 1; | ||
vertex_t f_prime_size = 0; | ||
|
||
while(f_size > 0) { | ||
for(vertex_t i = 0; i < f_size; i++) { | ||
vertex_t node_idx = f[i]; | ||
p[node_idx] += (2 * alpha) / (1 + alpha) * r[node_idx]; | ||
r_prime[node_idx] = 0; | ||
} | ||
|
||
for(vertex_t i = 0; i < f_size; i++) { | ||
vertex_t src_idx = f[i]; | ||
vertex_t deg = degrees[src_idx]; | ||
vertex_t offset = rowptr[src_idx]; | ||
weight_t inv_r_deg = r[src_idx] / deg; | ||
|
||
for(vertex_t j = 0; j < deg; j++) { | ||
vertex_t dst_idx = columns[offset + j]; | ||
weight_t update = ((1 - alpha) / (1 + alpha)) * inv_r_deg; | ||
|
||
weight_t oldval = r_prime[dst_idx]; | ||
weight_t newval = r_prime[dst_idx] + update; | ||
weight_t thresh = degrees[dst_idx] * epsilon; | ||
|
||
r_prime[dst_idx] = newval; | ||
|
||
if((oldval < thresh) && (newval >= thresh)) { | ||
f_prime[f_prime_size] = dst_idx; | ||
f_prime_size++; | ||
} | ||
} | ||
} | ||
|
||
memcpy(r, r_prime, n_nodes * sizeof(weight_t)); | ||
|
||
vertex_t* tmp_ptr = f; | ||
f = f_prime; | ||
f_size = f_prime_size; | ||
f_prime = tmp_ptr; | ||
f_prime_size = 0; | ||
} | ||
} | ||
|
||
auto t_stop = high_resolution_clock::now(); | ||
auto elapsed = duration_cast<microseconds>(t_stop - t_start).count(); | ||
return (float)elapsed / 1000; | ||
} | ||
|
||
template <typename val_t> | ||
int compute_error(thrust::device_vector<val_t> _gpu_result, | ||
thrust::host_vector<val_t> cpu_result) { | ||
thrust::host_vector<val_t> gpu_result(_gpu_result); | ||
|
||
int n_errors = 0; | ||
for (int i = 0; i < cpu_result.size(); i++) { | ||
if (abs(gpu_result[i] - cpu_result[i]) > 1e-6) { | ||
n_errors++; | ||
} | ||
} | ||
return n_errors; | ||
} | ||
|
||
} // namespace sssp_cpu |
Oops, something went wrong.