Skip to content

Commit

Permalink
Merge pull request #267 from elstehle/enh/catch2-device-reduce
Browse files Browse the repository at this point in the history
Port cub::DeviceReduce tests to Catch2
  • Loading branch information
elstehle authored Jul 26, 2023
2 parents 83d54b2 + 1048b42 commit 025364c
Show file tree
Hide file tree
Showing 11 changed files with 1,077 additions and 2,083 deletions.
4 changes: 2 additions & 2 deletions cub/test/bfloat16.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,15 @@ struct bfloat16_t
******************************************************************************/

/// Insert formatted \p bfloat16_t into the output stream
std::ostream& operator<<(std::ostream &out, const bfloat16_t &x)
inline std::ostream& operator<<(std::ostream &out, const bfloat16_t &x)
{
out << (float)x;
return out;
}


/// Insert formatted \p __nv_bfloat16 into the output stream
std::ostream& operator<<(std::ostream &out, const __nv_bfloat16 &x)
inline std::ostream& operator<<(std::ostream &out, const __nv_bfloat16 &x)
{
return out << bfloat16_t(x);
}
Expand Down
48 changes: 48 additions & 0 deletions cub/test/c2h/extended_types.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/******************************************************************************
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#pragma once

#ifndef TEST_HALF_T
#define TEST_HALF_T !_NVHPC_CUDA
#endif

#ifndef TEST_BF_T
#define TEST_BF_T !_NVHPC_CUDA
#endif

#ifdef TEST_HALF_T
#include <cuda_fp16.h>

#include "../half.h"
#endif

#ifdef TEST_BF_T
#include <cuda_bf16.h>

#include "../bfloat16.h"
#endif
9 changes: 9 additions & 0 deletions cub/test/c2h/generators.cu
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include <cstdint>

#include <c2h/custom_type.cuh>
#include <c2h/extended_types.cuh>
#include <c2h/generators.cuh>
#include <curand.h>
#include <fill_striped.cuh>
Expand Down Expand Up @@ -316,6 +317,14 @@ INSTANTIATE(std::int64_t);
INSTANTIATE(float);
INSTANTIATE(double);

#ifdef TEST_HALF_T
INSTANTIATE(half_t);
#endif

#ifdef TEST_BF_T
INSTANTIATE(bfloat16_t);
#endif

template <typename T, int VecItem>
struct vec_gen_helper_t;

Expand Down
238 changes: 238 additions & 0 deletions cub/test/catch2_test_device_reduce.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,238 @@
/******************************************************************************
* Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#include <cub/device/device_reduce.cuh>

#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/iterator/constant_iterator.h>

#include <cstdint>

#include "catch2_test_device_reduce.cuh"

// Has to go after all cub headers. Otherwise, this test won't catch unused
// variables in cub kernels.
#include "c2h/custom_type.cuh"
#include "c2h/extended_types.cuh"
#include "catch2/catch.hpp"
#include "catch2_test_cdp_helper.h"
#include "catch2_test_helper.h"
#include <nv/target>

DECLARE_CDP_WRAPPER(cub::DeviceReduce::Reduce, device_reduce);
DECLARE_CDP_WRAPPER(cub::DeviceReduce::Sum, device_sum);
DECLARE_CDP_WRAPPER(cub::DeviceReduce::Min, device_min);
DECLARE_CDP_WRAPPER(cub::DeviceReduce::ArgMin, device_arg_min);
DECLARE_CDP_WRAPPER(cub::DeviceReduce::Max, device_max);
DECLARE_CDP_WRAPPER(cub::DeviceReduce::ArgMax, device_arg_max);

// %PARAM% TEST_CDP cdp 0:1
// %PARAM% TEST_TYPES types 0:1:2:3

// List of types to test
using custom_t = c2h::custom_type_t<c2h::accumulateable_t,
c2h::equal_comparable_t,
c2h::lexicographical_less_comparable_t,
c2h::lexicographical_greater_comparable_t>;

#if TEST_TYPES == 0
using full_type_list =
c2h::type_list<type_pair<std::uint8_t>, type_pair<std::int8_t, std::int32_t>>;
#elif TEST_TYPES == 1
using full_type_list = c2h::type_list<type_pair<std::int32_t>, type_pair<std::int64_t>>;
#elif TEST_TYPES == 2
using full_type_list = c2h::type_list<type_pair<uchar3>, type_pair<ulonglong4>>;
#elif TEST_TYPES == 3
// clang-format off
using full_type_list = c2h::type_list<
type_pair<custom_t>
#if TEST_HALF_T
, type_pair<half_t> // testing half
#endif
#if TEST_BF_T
, type_pair<bfloat16_t> // testing bf16
#endif
>;
// clang-format on
#endif

/**
* @brief Input data generation mode
*/
enum class gen_data_t : int
{
/// Uniform random data generation
GEN_TYPE_RANDOM,
/// Constant value as input data
GEN_TYPE_CONST
};

CUB_TEST("Device reduce works with all device interfaces", "[reduce][device]", full_type_list)
{
using params = params_t<TestType>;
using item_t = typename params::item_t;
using output_t = typename params::output_t;
using offset_t = int32_t;

constexpr int max_items = 5000000;
constexpr int min_items = 1;
constexpr int num_segments = 1;

// Generate the input sizes to test for
const int num_items = GENERATE_COPY(take(3, random(min_items, max_items)),
values({
min_items,
max_items,
}));

// Input data generation to test
const gen_data_t data_gen_mode = GENERATE_COPY(gen_data_t::GEN_TYPE_RANDOM,
gen_data_t::GEN_TYPE_CONST);

// Generate input data
thrust::device_vector<item_t> in_items(num_items);
if (data_gen_mode == gen_data_t::GEN_TYPE_RANDOM)
{
c2h::gen(CUB_SEED(2), in_items);
}
else
{
item_t default_constant{};
init_default_constant(default_constant);
thrust::fill(in_items.begin(), in_items.end(), default_constant);
}
auto d_in_it = thrust::raw_pointer_cast(in_items.data());

SECTION("reduce")
{
using op_t = cub::Sum;

// Binary reduction operator
auto reduction_op = unwrap_op(reference_extended_fp(d_in_it), op_t{});

// Prepare verification data
using accum_t = cub::detail::accumulator_t<op_t, output_t, item_t>;
output_t expected_result =
static_cast<output_t>(compute_single_problem_reference(in_items, reduction_op, accum_t{}));

// Run test
thrust::device_vector<output_t> out_result(num_segments);
auto d_out_it = thrust::raw_pointer_cast(out_result.data());
using init_t = cub::detail::value_t<decltype(unwrap_it(d_out_it))>;
device_reduce(unwrap_it(d_in_it), unwrap_it(d_out_it), num_items, reduction_op, init_t{});

// Verify result
REQUIRE(expected_result == out_result[0]);
}

// Skip DeviceReduce::Sum tests for extended floating-point types because of unbounded epsilon due
// to pseudo associativity of the addition operation over floating point numbers
#if TEST_TYPES != 3
SECTION("sum")
{
using op_t = cub::Sum;
using accum_t = cub::detail::accumulator_t<op_t, output_t, item_t>;

// Prepare verification data
output_t expected_result =
static_cast<output_t>(compute_single_problem_reference(in_items, op_t{}, accum_t{}));

// Run test
thrust::device_vector<output_t> out_result(num_segments);
auto d_out_it = unwrap_it(thrust::raw_pointer_cast(out_result.data()));
device_sum(d_in_it, d_out_it, num_items);

// Verify result
REQUIRE(expected_result == out_result[0]);
}
#endif

SECTION("min")
{
// Prepare verification data
thrust::host_vector<item_t> host_items(in_items);
auto expected_result = *std::min_element(host_items.cbegin(), host_items.cend());

// Run test
thrust::device_vector<output_t> out_result(num_segments);
auto d_out_it = thrust::raw_pointer_cast(out_result.data());
device_min(unwrap_it(d_in_it), unwrap_it(d_out_it), num_items);

// Verify result
REQUIRE(expected_result == out_result[0]);
}

SECTION("max")
{
// Prepare verification data
thrust::host_vector<item_t> host_items(in_items);
auto expected_result = *std::max_element(host_items.cbegin(), host_items.cend());

// Run test
thrust::device_vector<output_t> out_result(num_segments);
auto d_out_it = thrust::raw_pointer_cast(out_result.data());
device_max(unwrap_it(d_in_it), unwrap_it(d_out_it), num_items);

// Verify result
REQUIRE(expected_result == out_result[0]);
}

SECTION("argmax")
{
// Prepare verification data
thrust::host_vector<item_t> host_items(in_items);
auto expected_result = std::max_element(host_items.cbegin(), host_items.cend());

// Run test
using result_t = cub::KeyValuePair<int, output_t>;
thrust::device_vector<result_t> out_result(num_segments);
device_arg_max(d_in_it, thrust::raw_pointer_cast(out_result.data()), num_items);

// Verify result
result_t gpu_result = out_result[0];
REQUIRE(expected_result[0] == gpu_result.value);
REQUIRE((expected_result - host_items.cbegin()) == gpu_result.key);
}

SECTION("argmin")
{
// Prepare verification data
thrust::host_vector<item_t> host_items(in_items);
auto expected_result = std::min_element(host_items.cbegin(), host_items.cend());

// Run test
using result_t = cub::KeyValuePair<int, output_t>;
thrust::device_vector<result_t> out_result(num_segments);
device_arg_min(d_in_it, thrust::raw_pointer_cast(out_result.data()), num_items);

// Verify result
result_t gpu_result = out_result[0];
REQUIRE(expected_result[0] == gpu_result.value);
REQUIRE((expected_result - host_items.cbegin()) == gpu_result.key);
}
}
Loading

0 comments on commit 025364c

Please sign in to comment.