Skip to content

Commit

Permalink
Reduce compile time/size for scan.cu (#7516)
Browse files Browse the repository at this point in the history
This PR reduces the number of calls to `inclusive_scan` and `exclusive_scan` by using a `null_replace_accessor` that allows non-nullable columns. This reduces the compile time and size of `scan.cu` by half. This PR also includes a scan gbenchmark that shows no change in performance from the original implementation.

Authors:
  - David (@davidwendt)

Approvers:
  - Paul Taylor (@trxcllnt)
  - Jake Hemstad (@jrhemstad)

URL: #7516
  • Loading branch information
davidwendt authored Mar 9, 2021
1 parent 4897a25 commit 444d9f2
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 60 deletions.
1 change: 1 addition & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ ConfigureBench(REDUCTION_BENCH
reduction/anyall_benchmark.cpp
reduction/dictionary_benchmark.cpp
reduction/reduce_benchmark.cpp
reduction/scan_benchmark.cpp
reduction/minmax_benchmark.cpp)

###################################################################################################
Expand Down
63 changes: 63 additions & 0 deletions cpp/benchmarks/reduction/scan_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <cudf/column/column.hpp>
#include <cudf/column/column_view.hpp>
#include <cudf/reduction.hpp>
#include <cudf/table/table.hpp>
#include <cudf/types.hpp>

class ReductionScan : public cudf::benchmark {
};

template <typename type>
static void BM_reduction_scan(benchmark::State& state, bool include_nulls)
{
cudf::size_type const n_rows{(cudf::size_type)state.range(0)};
auto const dtype = cudf::type_to_id<type>();
auto const table = create_random_table({dtype}, 1, row_count{n_rows});
if (!include_nulls) table->get_column(0).set_null_mask(rmm::device_buffer{}, 0);
cudf::column_view input(table->view().column(0));

for (auto _ : state) {
cuda_event_timer timer(state, true);
auto result = cudf::scan(input, cudf::make_min_aggregation(), cudf::scan_type::INCLUSIVE);
}
}

#define SCAN_BENCHMARK_DEFINE(name, type, nulls) \
BENCHMARK_DEFINE_F(ReductionScan, name) \
(::benchmark::State & state) { BM_reduction_scan<type>(state, nulls); } \
BENCHMARK_REGISTER_F(ReductionScan, name) \
->UseManualTime() \
->Arg(10000) /* 10k */ \
->Arg(100000) /* 100k */ \
->Arg(1000000) /* 1M */ \
->Arg(10000000) /* 10M */ \
->Arg(100000000); /* 100M */

SCAN_BENCHMARK_DEFINE(int8_no_nulls, int8_t, false);
SCAN_BENCHMARK_DEFINE(int32_no_nulls, int32_t, false);
SCAN_BENCHMARK_DEFINE(uint64_no_nulls, uint64_t, false);
SCAN_BENCHMARK_DEFINE(float_no_nulls, float, false);
SCAN_BENCHMARK_DEFINE(int16_nulls, int16_t, true);
SCAN_BENCHMARK_DEFINE(uint32_nulls, uint32_t, true);
SCAN_BENCHMARK_DEFINE(double_nulls, double, true);
48 changes: 28 additions & 20 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -71,41 +71,47 @@ inline auto make_counting_transform_iterator(cudf::size_type start, UnaryFunctio
}

/**
* @brief value accessor of column with null bitmask
* A unary functor returns scalar value at `id`.
* `operator() (cudf::size_type id)` computes `element` and valid flag at `id`
* This functor is only allowed for nullable columns.
* @brief Value accessor of column that may have a null bitmask.
*
* the return value for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
* This unary functor returns scalar value at `id`.
* The `operator()(cudf::size_type id)` computes the `element` and valid flag at `id`.
*
* @throws cudf::logic_error if the column is not nullable.
* @throws cudf::logic_error if column datatype and Element type mismatch.
* The return value for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
*
* @tparam Element The type of elements in the column
*/
template <typename Element>
struct null_replaced_value_accessor {
column_device_view const col; ///< column view of column in device
Element const null_replacement{}; ///< value returned when element is null
bool const has_nulls; ///< true if col has null elements

/**
* @brief constructor
* @param[in] _col column device view of cudf column
* @brief Creates an accessor for a null-replacement iterator.
*
* @throws cudf::logic_error if `col` type does not match Element type.
* @throws cudf::logic_error if `has_nulls` is true but `col` does not have a validity mask.
*
* @param[in] col column device view of cudf column
* @param[in] null_replacement The value to return for null elements
* @param[in] has_nulls Must be set to true if `col` has nulls.
*/
null_replaced_value_accessor(column_device_view const& _col, Element null_val)
: col{_col}, null_replacement{null_val}
null_replaced_value_accessor(column_device_view const& col,
Element null_val,
bool has_nulls = true)
: col{col}, null_replacement{null_val}, has_nulls{has_nulls}
{
CUDF_EXPECTS(data_type(type_to_id<Element>()) == col.type(), "the data type mismatch");
// verify valid is non-null, otherwise, is_valid_nocheck() will crash
CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column.");
CUDF_EXPECTS(type_to_id<Element>() == device_storage_type_id(col.type().id()),
"the data type mismatch");
// verify validity bitmask is non-null, otherwise, is_null_nocheck() will crash
if (has_nulls) CUDF_EXPECTS(col.nullable(), "column with nulls must have a validity bitmask");
}

CUDA_DEVICE_CALLABLE
Element operator()(cudf::size_type i) const
{
return col.is_valid_nocheck(i) ? col.element<Element>(i) : null_replacement;
return has_nulls && col.is_null_nocheck(i) ? null_replacement : col.element<Element>(i);
}
};

Expand Down Expand Up @@ -140,23 +146,25 @@ struct validity_accessor {
*
* Dereferencing the returned iterator for element `i` will return `column[i]`
* if it is valid, or `null_replacement` if it is null.
* This iterator is only allowed for nullable columns.
* This iterator is only allowed for both nullable and non-nullable columns.
*
* @throws cudf::logic_error if the column is not nullable.
* @throws cudf::logic_error if column datatype and Element type mismatch.
*
* @tparam Element The type of elements in the column
* @param column The column to iterate
* @param null_replacement The value to return for null elements
* @return auto Iterator that returns valid column elements, or a null
* @param has_nulls Must be set to true if `column` has nulls.
* @return Iterator that returns valid column elements, or a null
* replacement value for null elements.
*/
template <typename Element>
auto make_null_replacement_iterator(column_device_view const& column,
Element const null_replacement = Element{0})
Element const null_replacement = Element{0},
bool has_nulls = true)
{
return make_counting_transform_iterator(
0, null_replaced_value_accessor<Element>{column, null_replacement});
0, null_replaced_value_accessor<Element>{column, null_replacement, has_nulls});
}

/**
Expand Down
50 changes: 17 additions & 33 deletions cpp/src/reductions/scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,10 @@
#include <cudf/detail/iterator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/device_atomics.cuh>
#include <cudf/detail/utilities/device_operators.cuh>
#include <cudf/null_mask.hpp>
#include <cudf/reduction.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/span.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand All @@ -34,6 +33,7 @@

namespace cudf {
namespace detail {

/**
* @brief Dispatcher for running Scan operation on input column
* Dispatches scan operation on `Op` and creates output column
Expand Down Expand Up @@ -73,23 +73,14 @@ struct scan_dispatcher {
mutable_column_view output = output_column->mutable_view();
auto d_input = column_device_view::create(input_view, stream);

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});
} else {
auto input = d_input->begin<T>();
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});
}
auto input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::exclusive_scan(rmm::exec_policy(stream),
input,
input + size,
output.data<T>(),
Op::template identity<T>(),
Op{});

CHECK_CUDA(stream.value());
return output_column;
Expand Down Expand Up @@ -147,13 +138,9 @@ struct scan_dispatcher {
auto d_input = column_device_view::create(input_view, stream);
mutable_column_view output = output_column->mutable_view();

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
} else {
auto input = d_input->begin<T>();
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});
}
auto const input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, output.data<T>(), Op{});

CHECK_CUDA(stream.value());
return output_column;
Expand All @@ -171,13 +158,10 @@ struct scan_dispatcher {

auto d_input = column_device_view::create(input_view, stream);

if (input_view.has_nulls()) {
auto input = make_null_replacement_iterator(*d_input, Op::template identity<T>());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
} else {
auto input = d_input->begin<T>();
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});
}
auto input =
make_null_replacement_iterator(*d_input, Op::template identity<T>(), input_view.has_nulls());
thrust::inclusive_scan(rmm::exec_policy(stream), input, input + size, result.data(), Op{});

CHECK_CUDA(stream.value());

auto output_column =
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/iterator/value_iterator_test.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -343,7 +343,7 @@ TYPED_TEST(IteratorTest, error_handling)

CUDF_EXPECT_THROW_MESSAGE((cudf::detail::make_null_replacement_iterator(
*d_col_no_null, cudf::test::make_type_param_scalar<T>(0))),
"Unexpected non-nullable column.");
"column with nulls must have a validity bitmask");

CUDF_EXPECT_THROW_MESSAGE((d_col_no_null->pair_begin<T, true>()),
"Unexpected non-nullable column.");
Expand Down
29 changes: 24 additions & 5 deletions cpp/tests/reductions/scan_tests.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
* Copyright (c) 2019-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -509,8 +509,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanSum)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{1, 3, 6, 10}, scale};
auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({1, 2, 3, 0, 4, 0}, {1, 1, 1, 0, 1, 0}, scale);
auto const expected_nulls = fp_wrapper({1, 3, 6, 0, 10, 0}, {1, 1, 1, 0, 1, 0}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand All @@ -526,8 +531,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointPreScanSum)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{0, 1, 3, 6}, scale};
auto const result = cudf::scan(column, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({0, 1, 2, 3, 0, 4}, {0, 1, 1, 1, 0, 1}, scale);
auto const expected_nulls = fp_wrapper({0, 0, 1, 3, 0, 6}, {0, 1, 1, 1, 0, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_sum_aggregation(), scan_type::EXCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand Down Expand Up @@ -556,8 +566,13 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMin)
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const expected = fp_wrapper{{1, 1, 1, 1}, scale};
auto const result = cudf::scan(column, cudf::make_min_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), expected);

auto const with_nulls = fp_wrapper({1, 0, 2, 0, 3, 4}, {1, 0, 1, 0, 1, 1}, scale);
auto const expected_nulls = fp_wrapper({1, 0, 1, 0, 1, 1}, {1, 0, 1, 0, 1, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_min_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), expected_nulls);
}
}

Expand All @@ -572,7 +587,11 @@ TYPED_TEST(FixedPointTestBothReps, FixedPointScanMax)
auto const scale = scale_type{i};
auto const column = fp_wrapper{{1, 2, 3, 4}, scale};
auto const result = cudf::scan(column, cudf::make_max_aggregation(), scan_type::INCLUSIVE);

CUDF_TEST_EXPECT_COLUMNS_EQUAL(result->view(), column);

auto const with_nulls = fp_wrapper({1, 0, 0, 2, 3, 4}, {1, 0, 0, 1, 1, 1}, scale);
auto const result_nulls =
cudf::scan(with_nulls, cudf::make_max_aggregation(), scan_type::INCLUSIVE);
CUDF_TEST_EXPECT_COLUMNS_EQUAL(result_nulls->view(), with_nulls);
}
}

0 comments on commit 444d9f2

Please sign in to comment.