Skip to content

Commit

Permalink
Use device_uvector, device_span in sort groupby (#7523)
Browse files Browse the repository at this point in the history
- Replace device_vector with device_uvector
- Replace device_vector const& with device_span<const>

Ref. #7387 (comment)

Authors:
  - Karthikeyan (@karthikeyann)

Approvers:
  - Mike Wilson (@hyperbolic2346)
  - David (@davidwendt)

URL: #7523
  • Loading branch information
karthikeyann authored Mar 15, 2021
1 parent 5fea6ad commit 325d5b8
Show file tree
Hide file tree
Showing 17 changed files with 110 additions and 96 deletions.
6 changes: 3 additions & 3 deletions cpp/include/cudf/detail/groupby/sort_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/device_uvector.hpp>

namespace cudf {
namespace groupby {
Expand All @@ -40,8 +40,8 @@ namespace sort {
* value column
*/
struct sort_groupby_helper {
using index_vector = rmm::device_vector<size_type>;
using bitmask_vector = rmm::device_vector<bitmask_type>;
using index_vector = rmm::device_uvector<size_type>;
using bitmask_vector = rmm::device_uvector<bitmask_type>;
using column_ptr = std::unique_ptr<column>;
using index_vector_ptr = std::unique_ptr<index_vector>;
using bitmask_vector_ptr = std::unique_ptr<bitmask_vector>;
Expand Down
19 changes: 10 additions & 9 deletions cpp/src/groupby/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -164,18 +164,19 @@ groupby::groups groupby::get_groups(table_view values, rmm::mr::device_memory_re
CUDF_FUNC_RANGE();
auto grouped_keys = helper().sorted_keys(rmm::cuda_stream_default, mr);

auto group_offsets = helper().group_offsets(0);
auto const& group_offsets = helper().group_offsets(rmm::cuda_stream_default);
std::vector<size_type> group_offsets_vector(group_offsets.size());
thrust::copy(group_offsets.begin(), group_offsets.end(), group_offsets_vector.begin());
thrust::copy(thrust::device_pointer_cast(group_offsets.begin()),
thrust::device_pointer_cast(group_offsets.end()),
group_offsets_vector.begin());

std::unique_ptr<table> grouped_values{nullptr};
if (values.num_columns()) {
grouped_values = cudf::detail::gather(values,
helper().key_sort_order(),
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
rmm::cuda_stream_default,
mr);
auto grouped_values = cudf::detail::gather(values,
helper().key_sort_order(),
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
rmm::cuda_stream_default,
mr);
return groupby::groups{
std::move(grouped_keys), std::move(group_offsets_vector), std::move(grouped_values)};
} else {
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/sort/group_argmax.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <groupby/sort/group_single_pass_reduction_util.cuh>

#include <cudf/detail/gather.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -27,7 +28,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_argmax(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
column_view const& key_sort_order,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/groupby/sort/group_argmin.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <groupby/sort/group_single_pass_reduction_util.cuh>

#include <cudf/detail/gather.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -27,7 +28,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_argmin(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
column_view const& key_sort_order,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_collect.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,21 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/detail/gather.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_collect(column_view const &values,
rmm::device_vector<size_type> const &group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource *mr)
{
rmm::device_buffer offsets_data(
group_offsets.data().get(), group_offsets.size() * sizeof(cudf::size_type), stream, mr);
group_offsets.data(), group_offsets.size() * sizeof(cudf::size_type), stream, mr);

auto offsets = std::make_unique<cudf::column>(
cudf::data_type(cudf::type_to_id<cudf::size_type>()), num_groups + 1, std::move(offsets_data));
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_count.cu
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/iterator.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -29,7 +30,7 @@ namespace cudf {
namespace groupby {
namespace detail {
std::unique_ptr<column> group_count_valid(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -70,7 +71,7 @@ std::unique_ptr<column> group_count_valid(column_view const& values,
return result;
}

std::unique_ptr<column> group_count_all(rmm::device_vector<size_type> const& group_offsets,
std::unique_ptr<column> group_count_all(cudf::device_span<size_type const> group_offsets,
size_type num_groups,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/sort/group_max.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_max(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/sort/group_min.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_min(column_view const& values,
size_type num_groups,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/groupby/sort/group_nth_element.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <cudf/detail/gather.cuh>
#include <cudf/detail/iterator.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>

Expand All @@ -31,8 +32,8 @@ namespace groupby {
namespace detail {
std::unique_ptr<column> group_nth_element(column_view const &values,
column_view const &group_sizes,
rmm::device_vector<size_type> const &group_labels,
rmm::device_vector<size_type> const &group_offsets,
cudf::device_span<size_type const> group_labels,
cudf::device_span<size_type const> group_offsets,
size_type num_groups,
size_type n,
null_policy null_handling,
Expand Down
21 changes: 11 additions & 10 deletions cpp/src/groupby/sort/group_nunique.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/table/row_operators.cuh>
#include <cudf/types.hpp>
#include <cudf/utilities/span.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>
Expand All @@ -34,9 +35,9 @@ struct nunique_functor {
template <typename T>
typename std::enable_if_t<cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
operator()(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand All @@ -54,8 +55,8 @@ struct nunique_functor {
[v = *values_view,
equal,
null_handling,
group_offsets = group_offsets.data().get(),
group_labels = group_labels.data().get()] __device__(auto i) -> size_type {
group_offsets = group_offsets.data(),
group_labels = group_labels.data()] __device__(auto i) -> size_type {
bool is_input_countable =
(null_handling == null_policy::INCLUDE || v.is_valid_nocheck(i));
bool is_unique = is_input_countable &&
Expand All @@ -76,8 +77,8 @@ struct nunique_functor {
thrust::make_counting_iterator<size_type>(0),
[v = *values_view,
equal,
group_offsets = group_offsets.data().get(),
group_labels = group_labels.data().get()] __device__(auto i) -> size_type {
group_offsets = group_offsets.data(),
group_labels = group_labels.data()] __device__(auto i) -> size_type {
bool is_unique = group_offsets[group_labels[i]] == i || // first element or
(not equal.operator()<T>(i, i - 1)); // new unique value in sorted
return static_cast<size_type>(is_unique);
Expand All @@ -95,9 +96,9 @@ struct nunique_functor {
template <typename T>
typename std::enable_if_t<!cudf::is_equality_comparable<T, T>(), std::unique_ptr<column>>
operator()(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand All @@ -107,9 +108,9 @@ struct nunique_functor {
};
} // namespace
std::unique_ptr<column> group_nunique(column_view const& values,
rmm::device_vector<size_type> const& group_labels,
cudf::device_span<size_type const> group_labels,
size_type const num_groups,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
null_policy null_handling,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/groupby/sort/group_quantiles.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cudf/detail/aggregation/aggregation.hpp>
#include <cudf/dictionary/detail/iterator.cuh>
#include <cudf/dictionary/dictionary_column_view.hpp>
#include <cudf/utilities/span.hpp>

#include <groupby/sort/group_reductions.hpp>
#include <quantiles/quantiles_util.hpp>
Expand Down Expand Up @@ -77,7 +78,7 @@ struct quantiles_functor {
std::enable_if_t<std::is_arithmetic<T>::value, std::unique_ptr<column>> operator()(
column_view const& values,
column_view const& group_sizes,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type const num_groups,
rmm::device_vector<double> const& quantile,
interpolation interpolation,
Expand Down Expand Up @@ -110,7 +111,7 @@ struct quantiles_functor {
values_iter,
*group_size_view,
*result_view,
group_offsets.data().get(),
group_offsets.data(),
quantile.data().get(),
static_cast<size_type>(quantile.size()),
interpolation});
Expand All @@ -123,7 +124,7 @@ struct quantiles_functor {
values_iter,
*group_size_view,
*result_view,
group_offsets.data().get(),
group_offsets.data(),
quantile.data().get(),
static_cast<size_type>(quantile.size()),
interpolation});
Expand All @@ -145,7 +146,7 @@ struct quantiles_functor {
// TODO: add optional check for is_sorted. Use context.flag_sorted
std::unique_ptr<column> group_quantiles(column_view const& values,
column_view const& group_sizes,
rmm::device_vector<size_type> const& group_offsets,
cudf::device_span<size_type const> group_offsets,
size_type const num_groups,
std::vector<double> const& quantiles,
interpolation interp,
Expand Down
Loading

0 comments on commit 325d5b8

Please sign in to comment.