Skip to content

Commit

Permalink
Use simplified rmm::exec_policy(#331)
Browse files Browse the repository at this point in the history
Updates libcudf to use the new, simplified rmm::exec_policy and include the new refactored headers rmm/exec_policy.hpp and rmm/device_vector.hpp

The new exec_policy can be passed directly to Thrust, no longer any need to call rmm::exec_policy(stream)->on(stream).

Depends on rapidsai/rmm#647

Authors:
  - Mark Harris <mharris@nvidia.com>

Approvers:
  - Paul Taylor
  - Christopher Harris

URL: #331
  • Loading branch information
harrism authored Dec 10, 2020
1 parent dc725f8 commit a1f7120
Show file tree
Hide file tree
Showing 19 changed files with 118 additions and 122 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

- PR #332 fix directed_hausdorff_distance's space_offsets name + documentation

- PR #331 Use simplified `rmm::exec_policy`

## Bug Fixes

# cuSpatial 0.17.0 (Date TBD)
Expand Down
26 changes: 11 additions & 15 deletions cpp/src/indexing/construction/detail/phase_1.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/functional.h>
#include <thrust/iterator/constant_iterator.h>
Expand Down Expand Up @@ -63,7 +64,7 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x,
rmm::mr::device_memory_resource *mr)
{
rmm::device_uvector<uint32_t> keys(x.size(), stream);
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
make_zip_iterator(x.begin<T>(), y.begin<T>()),
make_zip_iterator(x.begin<T>(), y.begin<T>()) + x.size(),
keys.begin(),
Expand All @@ -79,15 +80,13 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x,

auto indices = make_fixed_width_column<uint32_t>(keys.size(), stream, mr);

thrust::sequence(rmm::exec_policy(stream)->on(stream.value()),
thrust::sequence(rmm::exec_policy(stream),
indices->mutable_view().begin<uint32_t>(),
indices->mutable_view().end<uint32_t>());

// Sort the codes and point indices
thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()),
keys.begin(),
keys.end(),
indices->mutable_view().begin<int32_t>());
thrust::stable_sort_by_key(
rmm::exec_policy(stream), keys.begin(), keys.end(), indices->mutable_view().begin<int32_t>());

return std::make_pair(std::move(keys), std::move(indices));
}
Expand All @@ -110,7 +109,7 @@ inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
BinaryOp binary_op,
rmm::cuda_stream_view stream)
{
auto result = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()),
auto result = thrust::reduce_by_key(rmm::exec_policy(stream),
keys_begin,
keys_end,
vals_in,
Expand Down Expand Up @@ -207,19 +206,19 @@ reverse_tree_levels(rmm::device_uvector<uint32_t> const &quad_keys_in,
cudf::size_type level_end = end_pos[level];
cudf::size_type level_begin = begin_pos[level];
cudf::size_type num_quads = level_end - level_begin;
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
thrust::fill(rmm::exec_policy(stream),
quad_levels.begin() + offset,
quad_levels.begin() + offset + num_quads,
level);
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_keys_in.begin() + level_begin,
quad_keys_in.begin() + level_end,
quad_keys.begin() + offset);
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_point_count_in.begin() + level_begin,
quad_point_count_in.begin() + level_end,
quad_point_count.begin() + offset);
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_child_count_in.begin() + level_begin,
quad_child_count_in.begin() + level_end,
quad_child_count.begin() + offset);
Expand Down Expand Up @@ -293,10 +292,7 @@ inline auto make_full_levels(cudf::column_view const &x,
quad_child_count.resize(num_bottom_quads * (max_depth + 1), stream);

// Zero out the quad_child_count vector because we're reusing the point_keys vector
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
quad_child_count.begin(),
quad_child_count.end(),
0);
thrust::fill(rmm::exec_policy(stream), quad_child_count.begin(), quad_child_count.end(), 0);

//
// Compute "full" quads for the tree at each level. Starting from the quadrant
Expand Down
41 changes: 20 additions & 21 deletions cpp/src/indexing/construction/detail/phase_2.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
Expand All @@ -47,7 +48,7 @@ inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view co
rmm::cuda_stream_view stream)
{
rmm::device_uvector<uint32_t> leaf_pos(num_valid_nodes, stream);
auto result = thrust::copy_if(rmm::exec_policy(stream)->on(stream.value()),
auto result = thrust::copy_if(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0) + num_valid_nodes,
indicator.begin<bool>(),
Expand All @@ -70,7 +71,7 @@ inline rmm::device_uvector<uint32_t> flatten_point_keys(
rmm::device_uvector<uint32_t> flattened_keys(num_valid_nodes, stream);
auto keys_and_levels =
make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin<bool>());
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
keys_and_levels,
keys_and_levels + num_valid_nodes,
flattened_keys.begin(),
Expand Down Expand Up @@ -107,24 +108,23 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
flatten_point_keys(quad_keys, quad_level, indicator, num_valid_nodes, max_depth, stream);

rmm::device_uvector<uint32_t> initial_sort_indices(num_valid_nodes, stream);
thrust::sequence(rmm::exec_policy(stream)->on(stream.value()),
initial_sort_indices.begin(),
initial_sort_indices.end());
thrust::sequence(
rmm::exec_policy(stream), initial_sort_indices.begin(), initial_sort_indices.end());

rmm::device_uvector<uint32_t> quad_point_count_tmp(num_valid_nodes, stream);
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.end(),
quad_point_count_tmp.begin());

// sort indices and temporary point counts
thrust::stable_sort_by_key(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
flattened_keys.begin(),
flattened_keys.end(),
make_zip_iterator(initial_sort_indices.begin(), quad_point_count_tmp.begin()));

thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::remove_if(rmm::exec_policy(stream),
quad_point_count_tmp.begin(),
quad_point_count_tmp.begin() + num_valid_nodes,
quad_point_count_tmp.begin(),
Expand All @@ -151,22 +151,22 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(

rmm::device_uvector<uint32_t> quad_point_offsets_tmp(leaf_offsets.size(), stream);

thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
thrust::exclusive_scan(rmm::exec_policy(stream),
quad_point_count_tmp.begin(),
quad_point_count_tmp.end(),
quad_point_offsets_tmp.begin());

auto counts_and_offsets =
make_zip_iterator(quad_point_count_tmp.begin(), quad_point_offsets_tmp.begin());

thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()),
thrust::stable_sort_by_key(rmm::exec_policy(stream),
initial_sort_indices.begin(),
initial_sort_indices.end(),
counts_and_offsets);

rmm::device_uvector<uint32_t> quad_point_offsets(num_valid_nodes, stream);

thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
thrust::scatter(rmm::exec_policy(stream),
counts_and_offsets,
counts_and_offsets + leaf_offsets.size(),
leaf_offsets.begin(),
Expand All @@ -188,15 +188,14 @@ inline rmm::device_uvector<uint32_t> compute_parent_positions(
auto parent_pos = [&]() {
rmm::device_uvector<uint32_t> position_map(num_parent_nodes, stream);
// line 1 of algorithm in Fig. 5 in ref.
thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
thrust::exclusive_scan(rmm::exec_policy(stream),
quad_child_count.begin(),
quad_child_count.begin() + num_parent_nodes,
position_map.begin());
// line 2 of algorithm in Fig. 5 in ref.
rmm::device_uvector<uint32_t> parent_pos(num_child_nodes, stream);
thrust::uninitialized_fill(
rmm::exec_policy(stream)->on(stream.value()), parent_pos.begin(), parent_pos.end(), 0);
thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
thrust::uninitialized_fill(rmm::exec_policy(stream), parent_pos.begin(), parent_pos.end(), 0);
thrust::scatter(rmm::exec_policy(stream),
thrust::make_counting_iterator(0),
thrust::make_counting_iterator(0) + num_parent_nodes,
position_map.begin(),
Expand All @@ -205,7 +204,7 @@ inline rmm::device_uvector<uint32_t> compute_parent_positions(
}();

// line 3 of algorithm in Fig. 5 in ref.
thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
thrust::inclusive_scan(rmm::exec_policy(stream),
parent_pos.begin(),
parent_pos.begin() + num_child_nodes,
parent_pos.begin(),
Expand Down Expand Up @@ -249,7 +248,7 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
// Start counting nodes at level 2, since children of the root node should not
// be discarded.
auto num_invalid_parent_nodes =
thrust::count_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::count_if(rmm::exec_policy(stream),
parent_point_counts,
parent_point_counts + (num_parent_nodes - level_1_size),
// i.e. quad_point_count[parent_pos] <= min_size
Expand All @@ -268,7 +267,7 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
quad_levels.begin() + level_1_size);

auto last_valid =
thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::remove_if(rmm::exec_policy(stream),
tree,
tree + num_child_nodes,
parent_point_counts,
Expand Down Expand Up @@ -314,14 +313,14 @@ inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
auto is_quad = make_fixed_width_column<bool>(num_valid_nodes, stream, mr);

// line 6 of algorithm in Fig. 5 in ref.
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.begin() + num_parent_nodes,
is_quad->mutable_view().begin<bool>(),
thrust::placeholders::_1 > min_size);

// line 7 of algorithm in Fig. 5 in ref.
thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::replace_if(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.begin() + num_parent_nodes,
is_quad->view().begin<bool>(),
Expand All @@ -331,7 +330,7 @@ inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
if (num_valid_nodes > num_parent_nodes) {
// zero-fill the rest of the indicator column because
// device_memory_resources aren't required to initialize allocations
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
thrust::fill(rmm::exec_policy(stream),
is_quad->mutable_view().begin<bool>() + num_parent_nodes,
is_quad->mutable_view().end<bool>(),
0);
Expand Down
25 changes: 13 additions & 12 deletions cpp/src/indexing/construction/point_quadtree.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

/*
* quadtree indexing on points using the bottom-up algorithm described at ref.
Expand Down Expand Up @@ -54,7 +55,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
rmm::mr::device_memory_resource *mr)
{
// count the number of child nodes
auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream)->on(stream.value()),
auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream),
quad_child_count.begin(),
quad_child_count.begin() + num_parent_nodes);

Expand Down Expand Up @@ -91,15 +92,15 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>

auto quad_child_pos = make_fixed_width_column<uint32_t>(num_valid_nodes, stream, mr);
// line 9 of algorithm in Fig. 5 in ref.
thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::replace_if(rmm::exec_policy(stream),
quad_child_count.begin(),
quad_child_count.begin() + num_valid_nodes,
is_quad->view().begin<uint8_t>(),
!thrust::placeholders::_1,
0);

// line 10 of algorithm in Fig. 5 in ref.
thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
thrust::exclusive_scan(rmm::exec_policy(stream),
quad_child_count.begin(),
quad_child_count.end(),
quad_child_pos->mutable_view().begin<uint32_t>(),
Expand All @@ -112,7 +113,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>

// for each value in `is_quad` copy from `quad_child_pos` if true, else
// `quad_point_pos`
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
offsets_iter,
offsets_iter + num_valid_nodes,
offsets->mutable_view().template begin<uint32_t>(),
Expand All @@ -131,7 +132,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
auto lengths_iter = make_zip_iterator(is_quad->view().begin<bool>(), //
quad_child_count.begin(),
quad_point_count.begin());
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
lengths_iter,
lengths_iter + num_valid_nodes,
lengths->mutable_view().template begin<uint32_t>(),
Expand All @@ -144,7 +145,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
auto keys = make_fixed_width_column<uint32_t>(num_valid_nodes, stream, mr);

// Copy quad keys to keys output column
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_keys.begin(),
quad_keys.end(),
keys->mutable_view().begin<uint32_t>());
Expand All @@ -153,7 +154,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
auto levels = make_fixed_width_column<uint8_t>(num_valid_nodes, stream, mr);

// Copy quad levels to levels output column
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_levels.begin(),
quad_levels.end(),
levels->mutable_view().begin<uint8_t>());
Expand Down Expand Up @@ -185,31 +186,31 @@ inline std::unique_ptr<cudf::table> make_leaf_tree(
auto offsets = make_fixed_width_column<uint32_t>(num_top_quads, stream, mr);

// copy quad keys from the front of the quad_keys list
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_keys.begin(),
quad_keys.begin() + num_top_quads,
keys->mutable_view().begin<uint32_t>());

// copy point counts from the front of the quad_point_count list
thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy(rmm::exec_policy(stream),
quad_point_count.begin(),
quad_point_count.begin() + num_top_quads,
lengths->mutable_view().begin<uint32_t>());

// All leaves are children of the root node (level 0)
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
thrust::fill(rmm::exec_policy(stream),
levels->mutable_view().begin<uint8_t>(),
levels->mutable_view().end<uint8_t>(),
0);

// Quad node indicators are false for leaf nodes
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
thrust::fill(rmm::exec_policy(stream),
is_quad->mutable_view().begin<bool>(),
is_quad->mutable_view().end<bool>(),
false);

// compute offsets from lengths
thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
thrust::exclusive_scan(rmm::exec_policy(stream),
lengths->view().begin<uint32_t>(),
lengths->view().end<uint32_t>(),
offsets->mutable_view().begin<uint32_t>());
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/interpolate/cubic_spline.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cuspatial/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

#include <cusparse.h>

Expand All @@ -48,7 +49,7 @@ struct parallel_search {
curve_ids.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr);
int32_t* p_result = result->mutable_view().data<int32_t>();
thrust::for_each(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
thrust::make_counting_iterator<int>(0),
thrust::make_counting_iterator<int>(query_coords.size()),
[p_t, p_curve_ids, p_prefixes, p_query_coords, p_result] __device__(int index) {
Expand Down Expand Up @@ -103,7 +104,7 @@ struct interpolate {
cudf::make_numeric_column(t.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr);
T* p_result = result->mutable_view().data<T>();
thrust::for_each(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
thrust::make_counting_iterator<int>(0),
thrust::make_counting_iterator<int>(t.size()),
[p_t, p_ids, p_coef_indices, p_d3, p_d2, p_d1, p_d0, p_result] __device__(int index) {
Expand Down Expand Up @@ -150,7 +151,7 @@ struct coefficients_compute {
T* p_d1 = d1.data<T>();
T* p_d0 = d0.data<T>();
thrust::for_each(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
thrust::make_counting_iterator<int>(1),
thrust::make_counting_iterator<int>(prefixes.size()),
[p_t, p_y, p_prefixes, p_h, p_i, p_z, p_d3, p_d2, p_d1, p_d0] __device__(int index) {
Expand Down Expand Up @@ -214,7 +215,7 @@ struct compute_spline_tridiagonals {
T* p_u = u.data<T>();
T* p_h = h.data<T>();
T* p_i = i.data<T>();
thrust::for_each(rmm::exec_policy(stream)->on(stream.value()),
thrust::for_each(rmm::exec_policy(stream),
thrust::make_counting_iterator<int>(1),
thrust::make_counting_iterator<int>(prefixes.size()),
[p_t, p_y, p_prefixes, p_d, p_dlu, p_u, p_h, p_i] __device__(int index) {
Expand Down
Loading

0 comments on commit a1f7120

Please sign in to comment.