Skip to content

Commit

Permalink
Merge pull request #3752 from rapidsai/branch-23.08
Browse files Browse the repository at this point in the history
Forward-merge branch-23.08 to branch-23.10
  • Loading branch information
GPUtester authored Jul 27, 2023
2 parents 8a85705 + ad74817 commit 4951f04
Show file tree
Hide file tree
Showing 15 changed files with 2,048 additions and 61 deletions.
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,7 @@ set(CUGRAPH_SOURCES
src/traversal/bfs_sg.cu
src/traversal/bfs_mg.cu
src/traversal/sssp_sg.cu
src/traversal/od_shortest_distances_sg.cu
src/traversal/sssp_mg.cu
src/link_analysis/hits_sg.cu
src/link_analysis/hits_mg.cu
Expand Down
39 changes: 39 additions & 0 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1179,6 +1179,45 @@ void sssp(raft::handle_t const& handle,
weight_t cutoff = std::numeric_limits<weight_t>::max(),
bool do_expensive_check = false);

/*
* @brief Compute the shortest distances from the given origins to all the given destinations.
*
* This algorithm is designed for large diameter graphs. For small diameter graphs, running the
* cugraph::sssp function in a sequentially executed loop might be faster. This algorithms currently
* works only for single-GPU (we are not aware of large diameter graphs that won't fit in a single
* GPU).
*
* @throws cugraph::logic_error on erroneous input arguments.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weights. Needs to be a floating point type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* or multi-GPU (true).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param edge_weight_view View object holding edge weights for @p graph_view.
* @param origins An array of origins (starting vertices) to find shortest distances. There should
* be no duplicates in @p origins.
* @param destinations An array of destinations (end vertices) to find shortest distances. There
* should be no duplicates in @p destinations.
* @param cutoff Any destinations farther than @p cutoff will be marked as unreachable.
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return A vector of size @p origins.size() * @p destinations.size(). The i'th element of the
* returned vector is the shortest distance from the (i / @p destinations.size())'th origin to the
* (i % @p destinations.size())'th destination.
*/
template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
rmm::device_uvector<weight_t> od_shortest_distances(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
edge_property_view_t<edge_t, weight_t const*> edge_weight_view,
raft::device_span<vertex_t const> origins,
raft::device_span<vertex_t const> destinations,
weight_t cutoff = std::numeric_limits<weight_t>::max(),
bool do_expensive_check = false);

/**
* @brief Compute PageRank scores.
*
Expand Down
44 changes: 26 additions & 18 deletions cpp/include/cugraph/edge_partition_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,15 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
{
}

size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
template <typename MajorIterator>
size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return dcs_nzd_vertices_ ? thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
detail::local_degree_op_t<
vertex_t,
edge_t,
Expand All @@ -176,8 +178,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
thrust::plus<size_t>())
: thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
detail::local_degree_op_t<
vertex_t,
edge_t,
Expand Down Expand Up @@ -217,15 +219,17 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
return local_degrees;
}

rmm::device_uvector<edge_t> compute_local_degrees(raft::device_span<vertex_t const> majors,
template <typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(majors.size(), stream);
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
if (dcs_nzd_vertices_) {
assert(major_hypersparse_first_);
thrust::transform(rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, true>{
this->offsets_,
Expand All @@ -235,8 +239,8 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
} else {
thrust::transform(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_, major_range_first_, std::byte{0} /* dummy */, std::byte{0} /* dummy */});
Expand Down Expand Up @@ -349,13 +353,15 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
{
}

size_t compute_number_of_edges(raft::device_span<vertex_t const> majors,
template <typename MajorIterator>
size_t compute_number_of_edges(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
return thrust::transform_reduce(
rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
detail::local_degree_op_t<vertex_t,
edge_t,
size_t /* no limit on majors.size(), so edge_t can overflow */,
Expand Down Expand Up @@ -383,13 +389,15 @@ class edge_partition_device_view_t<vertex_t, edge_t, multi_gpu, std::enable_if_t
return local_degrees;
}

rmm::device_uvector<edge_t> compute_local_degrees(raft::device_span<vertex_t const> majors,
template <typename MajorIterator>
rmm::device_uvector<edge_t> compute_local_degrees(MajorIterator major_first,
MajorIterator major_last,
rmm::cuda_stream_view stream) const
{
rmm::device_uvector<edge_t> local_degrees(majors.size(), stream);
rmm::device_uvector<edge_t> local_degrees(thrust::distance(major_first, major_last), stream);
thrust::transform(rmm::exec_policy(stream),
majors.begin(),
majors.end(),
major_first,
major_last,
local_degrees.begin(),
detail::local_degree_op_t<vertex_t, edge_t, edge_t, multi_gpu, false>{
this->offsets_,
Expand Down
16 changes: 16 additions & 0 deletions cpp/include/cugraph/utilities/dataframe_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,22 @@ auto allocate_dataframe_buffer(size_t buffer_size, rmm::cuda_stream_view stream_
std::make_index_sequence<tuple_size>(), buffer_size, stream_view);
}

template <typename BufferType>
void reserve_dataframe_buffer(BufferType& buffer,
size_t new_buffer_capacity,
rmm::cuda_stream_view stream_view)
{
static_assert(is_std_tuple_of_arithmetic_vectors<std::remove_cv_t<BufferType>>::value ||
is_arithmetic_vector<std::remove_cv_t<BufferType>, rmm::device_uvector>::value);
if constexpr (is_std_tuple_of_arithmetic_vectors<std::remove_cv_t<BufferType>>::value) {
std::apply([new_buffer_capacity, stream_view](
auto&&... args) { (args.reserve(new_buffer_capacity, stream_view), ...); },
buffer);
} else {
buffer.reserve(new_buffer_capacity, stream_view);
}
}

template <typename BufferType>
void resize_dataframe_buffer(BufferType& buffer,
size_t new_buffer_size,
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/components/weakly_connected_components_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ struct v_op_t {
decltype(thrust::make_zip_iterator(thrust::make_tuple(
static_cast<vertex_type*>(nullptr), static_cast<vertex_type*>(nullptr)))) edge_buffer_first{};
// FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this requires
// placing the atomic barrier on managed memory and this adds additional complication.
// placing the atomic variable on managed memory and this adds additional complication.
size_t* num_edge_inserts{};
size_t bucket_idx_next{};
size_t bucket_idx_conflict{}; // relevant only if GraphViewType::is_multi_gpu is true
Expand Down Expand Up @@ -501,7 +501,7 @@ void weakly_connected_components_impl(raft::handle_t const& handle,
auto edge_buffer =
allocate_dataframe_buffer<thrust::tuple<vertex_t, vertex_t>>(0, handle.get_stream());
// FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this
// requires placing the atomic variable on managed memory and this make it less attractive.
// requires placing the atomic variable on managed memory and this adds additional complication.
rmm::device_scalar<size_t> num_edge_inserts(size_t{0}, handle.get_stream());

auto edge_dst_components =
Expand Down
4 changes: 1 addition & 3 deletions cpp/src/prims/detail/extract_transform_v_frontier_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -877,9 +877,7 @@ extract_transform_v_frontier_e(raft::handle_t const& handle,

auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i);
auto max_pushes = edge_partition.compute_number_of_edges(
raft::device_span<vertex_t const>(edge_partition_frontier_major_first,
edge_partition_frontier_major_last),
handle.get_stream());
edge_partition_frontier_major_first, edge_partition_frontier_major_last, handle.get_stream());

auto new_buffer_size = buffer_idx.value(handle.get_stream()) + max_pushes;
resize_optional_dataframe_buffer<output_key_t>(
Expand Down
Loading

0 comments on commit 4951f04

Please sign in to comment.