Use simplified rmm::exec_policy(#331)

Updates libcudf to use the new, simplified rmm::exec_policy and include the new refactored headers rmm/exec_policy.hpp and rmm/device_vector.hpp The new exec_policy can be passed directly to Thrust, no longer any need to call rmm::exec_policy(stream)->on(stream). Depends on rapidsai/rmm#647 Authors: - Mark Harris <mharris@nvidia.com> Approvers: - Paul Taylor - Christopher Harris URL: #331
rapidsai · Dec 10, 2020 · a1f7120 · a1f7120
1 parent dc725f8
commit a1f7120
Show file tree

Hide file tree

Showing 19 changed files with 118 additions and 122 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,8 @@
 
 - PR #332 fix directed_hausdorff_distance's space_offsets name + documentation
 
+- PR #331 Use simplified `rmm::exec_policy`
+
 ## Bug Fixes
 
 # cuSpatial 0.17.0 (Date TBD)

diff --git a/cpp/src/indexing/construction/detail/phase_1.cuh b/cpp/src/indexing/construction/detail/phase_1.cuh
@@ -27,6 +27,7 @@
 #include <rmm/thrust_rmm_allocator.h>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/functional.h>
 #include <thrust/iterator/constant_iterator.h>
@@ -63,7 +64,7 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x,
                                       rmm::mr::device_memory_resource *mr)
 {
   rmm::device_uvector<uint32_t> keys(x.size(), stream);
-  thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::transform(rmm::exec_policy(stream),
                     make_zip_iterator(x.begin<T>(), y.begin<T>()),
                     make_zip_iterator(x.begin<T>(), y.begin<T>()) + x.size(),
                     keys.begin(),
@@ -79,15 +80,13 @@ compute_point_keys_and_sorted_indices(cudf::column_view const &x,
 
   auto indices = make_fixed_width_column<uint32_t>(keys.size(), stream, mr);
 
-  thrust::sequence(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::sequence(rmm::exec_policy(stream),
                    indices->mutable_view().begin<uint32_t>(),
                    indices->mutable_view().end<uint32_t>());
 
   // Sort the codes and point indices
-  thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()),
-                             keys.begin(),
-                             keys.end(),
-                             indices->mutable_view().begin<int32_t>());
+  thrust::stable_sort_by_key(
+    rmm::exec_policy(stream), keys.begin(), keys.end(), indices->mutable_view().begin<int32_t>());
 
   return std::make_pair(std::move(keys), std::move(indices));
 }
@@ -110,7 +109,7 @@ inline cudf::size_type build_tree_level(InputIterator1 keys_begin,
                                         BinaryOp binary_op,
                                         rmm::cuda_stream_view stream)
 {
-  auto result = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream.value()),
+  auto result = thrust::reduce_by_key(rmm::exec_policy(stream),
                                       keys_begin,
                                       keys_end,
                                       vals_in,
@@ -207,19 +206,19 @@ reverse_tree_levels(rmm::device_uvector<uint32_t> const &quad_keys_in,
     cudf::size_type level_end   = end_pos[level];
     cudf::size_type level_begin = begin_pos[level];
     cudf::size_type num_quads   = level_end - level_begin;
-    thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::fill(rmm::exec_policy(stream),
                  quad_levels.begin() + offset,
                  quad_levels.begin() + offset + num_quads,
                  level);
-    thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::copy(rmm::exec_policy(stream),
                  quad_keys_in.begin() + level_begin,
                  quad_keys_in.begin() + level_end,
                  quad_keys.begin() + offset);
-    thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::copy(rmm::exec_policy(stream),
                  quad_point_count_in.begin() + level_begin,
                  quad_point_count_in.begin() + level_end,
                  quad_point_count.begin() + offset);
-    thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::copy(rmm::exec_policy(stream),
                  quad_child_count_in.begin() + level_begin,
                  quad_child_count_in.begin() + level_end,
                  quad_child_count.begin() + offset);
@@ -293,10 +292,7 @@ inline auto make_full_levels(cudf::column_view const &x,
   quad_child_count.resize(num_bottom_quads * (max_depth + 1), stream);
 
   // Zero out the quad_child_count vector because we're reusing the point_keys vector
-  thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
-               quad_child_count.begin(),
-               quad_child_count.end(),
-               0);
+  thrust::fill(rmm::exec_policy(stream), quad_child_count.begin(), quad_child_count.end(), 0);
 
   //
   // Compute "full" quads for the tree at each level. Starting from the quadrant

diff --git a/cpp/src/indexing/construction/detail/phase_2.cuh b/cpp/src/indexing/construction/detail/phase_2.cuh
@@ -25,6 +25,7 @@
 #include <rmm/thrust_rmm_allocator.h>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <thrust/functional.h>
 #include <thrust/iterator/counting_iterator.h>
@@ -47,7 +48,7 @@ inline rmm::device_uvector<uint32_t> compute_leaf_positions(cudf::column_view co
                                                             rmm::cuda_stream_view stream)
 {
   rmm::device_uvector<uint32_t> leaf_pos(num_valid_nodes, stream);
-  auto result = thrust::copy_if(rmm::exec_policy(stream)->on(stream.value()),
+  auto result = thrust::copy_if(rmm::exec_policy(stream),
                                 thrust::make_counting_iterator(0),
                                 thrust::make_counting_iterator(0) + num_valid_nodes,
                                 indicator.begin<bool>(),
@@ -70,7 +71,7 @@ inline rmm::device_uvector<uint32_t> flatten_point_keys(
   rmm::device_uvector<uint32_t> flattened_keys(num_valid_nodes, stream);
   auto keys_and_levels =
     make_zip_iterator(quad_keys.begin(), quad_level.begin(), indicator.begin<bool>());
-  thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::transform(rmm::exec_policy(stream),
                     keys_and_levels,
                     keys_and_levels + num_valid_nodes,
                     flattened_keys.begin(),
@@ -107,24 +108,23 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
       flatten_point_keys(quad_keys, quad_level, indicator, num_valid_nodes, max_depth, stream);
 
     rmm::device_uvector<uint32_t> initial_sort_indices(num_valid_nodes, stream);
-    thrust::sequence(rmm::exec_policy(stream)->on(stream.value()),
-                     initial_sort_indices.begin(),
-                     initial_sort_indices.end());
+    thrust::sequence(
+      rmm::exec_policy(stream), initial_sort_indices.begin(), initial_sort_indices.end());
 
     rmm::device_uvector<uint32_t> quad_point_count_tmp(num_valid_nodes, stream);
-    thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::copy(rmm::exec_policy(stream),
                  quad_point_count.begin(),
                  quad_point_count.end(),
                  quad_point_count_tmp.begin());
 
     // sort indices and temporary point counts
     thrust::stable_sort_by_key(
-      rmm::exec_policy(stream)->on(stream.value()),
+      rmm::exec_policy(stream),
       flattened_keys.begin(),
       flattened_keys.end(),
       make_zip_iterator(initial_sort_indices.begin(), quad_point_count_tmp.begin()));
 
-    thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::remove_if(rmm::exec_policy(stream),
                       quad_point_count_tmp.begin(),
                       quad_point_count_tmp.begin() + num_valid_nodes,
                       quad_point_count_tmp.begin(),
@@ -151,22 +151,22 @@ inline rmm::device_uvector<uint32_t> compute_flattened_first_point_positions(
 
   rmm::device_uvector<uint32_t> quad_point_offsets_tmp(leaf_offsets.size(), stream);
 
-  thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::exclusive_scan(rmm::exec_policy(stream),
                          quad_point_count_tmp.begin(),
                          quad_point_count_tmp.end(),
                          quad_point_offsets_tmp.begin());
 
   auto counts_and_offsets =
     make_zip_iterator(quad_point_count_tmp.begin(), quad_point_offsets_tmp.begin());
 
-  thrust::stable_sort_by_key(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::stable_sort_by_key(rmm::exec_policy(stream),
                              initial_sort_indices.begin(),
                              initial_sort_indices.end(),
                              counts_and_offsets);
 
   rmm::device_uvector<uint32_t> quad_point_offsets(num_valid_nodes, stream);
 
-  thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::scatter(rmm::exec_policy(stream),
                   counts_and_offsets,
                   counts_and_offsets + leaf_offsets.size(),
                   leaf_offsets.begin(),
@@ -188,15 +188,14 @@ inline rmm::device_uvector<uint32_t> compute_parent_positions(
   auto parent_pos = [&]() {
     rmm::device_uvector<uint32_t> position_map(num_parent_nodes, stream);
     // line 1 of algorithm in Fig. 5 in ref.
-    thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::exclusive_scan(rmm::exec_policy(stream),
                            quad_child_count.begin(),
                            quad_child_count.begin() + num_parent_nodes,
                            position_map.begin());
     // line 2 of algorithm in Fig. 5 in ref.
     rmm::device_uvector<uint32_t> parent_pos(num_child_nodes, stream);
-    thrust::uninitialized_fill(
-      rmm::exec_policy(stream)->on(stream.value()), parent_pos.begin(), parent_pos.end(), 0);
-    thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::uninitialized_fill(rmm::exec_policy(stream), parent_pos.begin(), parent_pos.end(), 0);
+    thrust::scatter(rmm::exec_policy(stream),
                     thrust::make_counting_iterator(0),
                     thrust::make_counting_iterator(0) + num_parent_nodes,
                     position_map.begin(),
@@ -205,7 +204,7 @@ inline rmm::device_uvector<uint32_t> compute_parent_positions(
   }();
 
   // line 3 of algorithm in Fig. 5 in ref.
-  thrust::inclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::inclusive_scan(rmm::exec_policy(stream),
                          parent_pos.begin(),
                          parent_pos.begin() + num_child_nodes,
                          parent_pos.begin(),
@@ -249,7 +248,7 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
   // Start counting nodes at level 2, since children of the root node should not
   // be discarded.
   auto num_invalid_parent_nodes =
-    thrust::count_if(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::count_if(rmm::exec_policy(stream),
                      parent_point_counts,
                      parent_point_counts + (num_parent_nodes - level_1_size),
                      // i.e. quad_point_count[parent_pos] <= min_size
@@ -268,7 +267,7 @@ inline std::pair<uint32_t, uint32_t> remove_unqualified_quads(
                                 quad_levels.begin() + level_1_size);
 
   auto last_valid =
-    thrust::remove_if(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::remove_if(rmm::exec_policy(stream),
                       tree,
                       tree + num_child_nodes,
                       parent_point_counts,
@@ -314,14 +313,14 @@ inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
   auto is_quad = make_fixed_width_column<bool>(num_valid_nodes, stream, mr);
 
   // line 6 of algorithm in Fig. 5 in ref.
-  thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::transform(rmm::exec_policy(stream),
                     quad_point_count.begin(),
                     quad_point_count.begin() + num_parent_nodes,
                     is_quad->mutable_view().begin<bool>(),
                     thrust::placeholders::_1 > min_size);
 
   // line 7 of algorithm in Fig. 5 in ref.
-  thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::replace_if(rmm::exec_policy(stream),
                      quad_point_count.begin(),
                      quad_point_count.begin() + num_parent_nodes,
                      is_quad->view().begin<bool>(),
@@ -331,7 +330,7 @@ inline std::unique_ptr<cudf::column> construct_non_leaf_indicator(
   if (num_valid_nodes > num_parent_nodes) {
     // zero-fill the rest of the indicator column because
     // device_memory_resources aren't required to initialize allocations
-    thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::fill(rmm::exec_policy(stream),
                  is_quad->mutable_view().begin<bool>() + num_parent_nodes,
                  is_quad->mutable_view().end<bool>(),
                  0);

diff --git a/cpp/src/indexing/construction/point_quadtree.cu b/cpp/src/indexing/construction/point_quadtree.cu
@@ -25,6 +25,7 @@
 
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
 
 /*
  * quadtree indexing on points using the bottom-up algorithm described at ref.
@@ -54,7 +55,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
                                                    rmm::mr::device_memory_resource *mr)
 {
   // count the number of child nodes
-  auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream)->on(stream.value()),
+  auto num_child_nodes = thrust::reduce(rmm::exec_policy(stream),
                                         quad_child_count.begin(),
                                         quad_child_count.begin() + num_parent_nodes);
 
@@ -91,15 +92,15 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
 
     auto quad_child_pos = make_fixed_width_column<uint32_t>(num_valid_nodes, stream, mr);
     // line 9 of algorithm in Fig. 5 in ref.
-    thrust::replace_if(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::replace_if(rmm::exec_policy(stream),
                        quad_child_count.begin(),
                        quad_child_count.begin() + num_valid_nodes,
                        is_quad->view().begin<uint8_t>(),
                        !thrust::placeholders::_1,
                        0);
 
     // line 10 of algorithm in Fig. 5 in ref.
-    thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::exclusive_scan(rmm::exec_policy(stream),
                            quad_child_count.begin(),
                            quad_child_count.end(),
                            quad_child_pos->mutable_view().begin<uint32_t>(),
@@ -112,7 +113,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
 
     // for each value in `is_quad` copy from `quad_child_pos` if true, else
     // `quad_point_pos`
-    thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::transform(rmm::exec_policy(stream),
                       offsets_iter,
                       offsets_iter + num_valid_nodes,
                       offsets->mutable_view().template begin<uint32_t>(),
@@ -131,7 +132,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
   auto lengths_iter = make_zip_iterator(is_quad->view().begin<bool>(),  //
                                         quad_child_count.begin(),
                                         quad_point_count.begin());
-  thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::transform(rmm::exec_policy(stream),
                     lengths_iter,
                     lengths_iter + num_valid_nodes,
                     lengths->mutable_view().template begin<uint32_t>(),
@@ -144,7 +145,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
   auto keys = make_fixed_width_column<uint32_t>(num_valid_nodes, stream, mr);
 
   // Copy quad keys to keys output column
-  thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::copy(rmm::exec_policy(stream),
                quad_keys.begin(),
                quad_keys.end(),
                keys->mutable_view().begin<uint32_t>());
@@ -153,7 +154,7 @@ inline std::unique_ptr<cudf::table> make_quad_tree(rmm::device_uvector<uint32_t>
   auto levels = make_fixed_width_column<uint8_t>(num_valid_nodes, stream, mr);
 
   // Copy quad levels to levels output column
-  thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::copy(rmm::exec_policy(stream),
                quad_levels.begin(),
                quad_levels.end(),
                levels->mutable_view().begin<uint8_t>());
@@ -185,31 +186,31 @@ inline std::unique_ptr<cudf::table> make_leaf_tree(
   auto offsets = make_fixed_width_column<uint32_t>(num_top_quads, stream, mr);
 
   // copy quad keys from the front of the quad_keys list
-  thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::copy(rmm::exec_policy(stream),
                quad_keys.begin(),
                quad_keys.begin() + num_top_quads,
                keys->mutable_view().begin<uint32_t>());
 
   // copy point counts from the front of the quad_point_count list
-  thrust::copy(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::copy(rmm::exec_policy(stream),
                quad_point_count.begin(),
                quad_point_count.begin() + num_top_quads,
                lengths->mutable_view().begin<uint32_t>());
 
   // All leaves are children of the root node (level 0)
-  thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::fill(rmm::exec_policy(stream),
                levels->mutable_view().begin<uint8_t>(),
                levels->mutable_view().end<uint8_t>(),
                0);
 
   // Quad node indicators are false for leaf nodes
-  thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::fill(rmm::exec_policy(stream),
                is_quad->mutable_view().begin<bool>(),
                is_quad->mutable_view().end<bool>(),
                false);
 
   // compute offsets from lengths
-  thrust::exclusive_scan(rmm::exec_policy(stream)->on(stream.value()),
+  thrust::exclusive_scan(rmm::exec_policy(stream),
                          lengths->view().begin<uint32_t>(),
                          lengths->view().end<uint32_t>(),
                          offsets->mutable_view().begin<uint32_t>());

diff --git a/cpp/src/interpolate/cubic_spline.cu b/cpp/src/interpolate/cubic_spline.cu
@@ -24,6 +24,7 @@
 #include <cuspatial/error.hpp>
 
 #include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
 
 #include <cusparse.h>
 
@@ -48,7 +49,7 @@ struct parallel_search {
       curve_ids.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr);
     int32_t* p_result = result->mutable_view().data<int32_t>();
     thrust::for_each(
-      rmm::exec_policy(stream)->on(stream.value()),
+      rmm::exec_policy(stream),
       thrust::make_counting_iterator<int>(0),
       thrust::make_counting_iterator<int>(query_coords.size()),
       [p_t, p_curve_ids, p_prefixes, p_query_coords, p_result] __device__(int index) {
@@ -103,7 +104,7 @@ struct interpolate {
       cudf::make_numeric_column(t.type(), t.size(), cudf::mask_state::UNALLOCATED, stream, mr);
     T* p_result = result->mutable_view().data<T>();
     thrust::for_each(
-      rmm::exec_policy(stream)->on(stream.value()),
+      rmm::exec_policy(stream),
       thrust::make_counting_iterator<int>(0),
       thrust::make_counting_iterator<int>(t.size()),
       [p_t, p_ids, p_coef_indices, p_d3, p_d2, p_d1, p_d0, p_result] __device__(int index) {
@@ -150,7 +151,7 @@ struct coefficients_compute {
     T* p_d1                   = d1.data<T>();
     T* p_d0                   = d0.data<T>();
     thrust::for_each(
-      rmm::exec_policy(stream)->on(stream.value()),
+      rmm::exec_policy(stream),
       thrust::make_counting_iterator<int>(1),
       thrust::make_counting_iterator<int>(prefixes.size()),
       [p_t, p_y, p_prefixes, p_h, p_i, p_z, p_d3, p_d2, p_d1, p_d0] __device__(int index) {
@@ -214,7 +215,7 @@ struct compute_spline_tridiagonals {
     T* p_u                    = u.data<T>();
     T* p_h                    = h.data<T>();
     T* p_i                    = i.data<T>();
-    thrust::for_each(rmm::exec_policy(stream)->on(stream.value()),
+    thrust::for_each(rmm::exec_policy(stream),
                      thrust::make_counting_iterator<int>(1),
                      thrust::make_counting_iterator<int>(prefixes.size()),
                      [p_t, p_y, p_prefixes, p_d, p_dlu, p_u, p_h, p_i] __device__(int index) {