Skip to content

Commit

Permalink
Fix MSVC / CUB tests build
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed Oct 27, 2023
1 parent 4839078 commit 08fcae6
Show file tree
Hide file tree
Showing 11 changed files with 144 additions and 88 deletions.
6 changes: 0 additions & 6 deletions cub/cmake/CubBuildCompilerTargets.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,6 @@ function(cub_build_compiler_targets)
# Suppress overly-pedantic/unavoidable warnings brought in with /W4:
# C4324: structure was padded due to alignment specifier
append_option_if_available("/wd4324" cxx_compile_options)
# C4127: conditional expression is constant
# This can be fixed with `if constexpr` when available, but there's no way
# to silence these pre-C++17.
# TODO We should have per-dialect interface targets so we can leave these
# warnings enabled on C++17:
append_option_if_available("/wd4127" cxx_compile_options)
# C4505: unreferenced local function has been removed
# The CUDA `host_runtime.h` header emits this for
# `__cudaUnregisterBinaryUtil`.
Expand Down
90 changes: 47 additions & 43 deletions cub/cub/device/device_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,11 @@ _CCCL_IMPLICIT_SYSTEM_HEADER
#include <iterator>
#include <limits>

#include <cub/detail/cpp_compatibility.cuh>
#include <cub/device/dispatch/dispatch_histogram.cuh>
#include <cub/util_deprecated.cuh>


CUB_NAMESPACE_BEGIN


Expand Down Expand Up @@ -819,29 +821,30 @@ struct DeviceHistogram
using SampleT = cub::detail::value_t<SampleIteratorT>;
Int2Type<sizeof(SampleT) == 1> is_byte_sample;

if ((sizeof(OffsetT) > sizeof(int)) &&
((unsigned long long)(num_rows * row_stride_bytes) <
(unsigned long long)INT_MAX))
CUB_IF_CONSTEXPR(sizeof(OffsetT) > sizeof(int))
{
// Down-convert OffsetT data type
return DispatchHistogram<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
SampleIteratorT,
CounterT,
LevelT,
int>::DispatchEven(d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
(int)num_row_pixels,
(int)num_rows,
(int)(row_stride_bytes /
sizeof(SampleT)),
stream,
is_byte_sample);
if ((unsigned long long)(num_rows * row_stride_bytes) < (unsigned long long)INT_MAX)
{
// Down-convert OffsetT data type
return DispatchHistogram<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
SampleIteratorT,
CounterT,
LevelT,
int>::DispatchEven(d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
lower_level,
upper_level,
(int)num_row_pixels,
(int)num_rows,
(int)(row_stride_bytes /
sizeof(SampleT)),
stream,
is_byte_sample);
}
}

return DispatchHistogram<NUM_CHANNELS,
Expand Down Expand Up @@ -1594,28 +1597,29 @@ struct DeviceHistogram
using SampleT = cub::detail::value_t<SampleIteratorT>;
Int2Type<sizeof(SampleT) == 1> is_byte_sample;

if ((sizeof(OffsetT) > sizeof(int)) &&
((unsigned long long)(num_rows * row_stride_bytes) <
(unsigned long long)INT_MAX))
CUB_IF_CONSTEXPR(sizeof(OffsetT) > sizeof(int))
{
// Down-convert OffsetT data type
return DispatchHistogram<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
SampleIteratorT,
CounterT,
LevelT,
int>::DispatchRange(d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
d_levels,
(int)num_row_pixels,
(int)num_rows,
(int)(row_stride_bytes /
sizeof(SampleT)),
stream,
is_byte_sample);
if ((unsigned long long)(num_rows * row_stride_bytes) < (unsigned long long)INT_MAX)
{
// Down-convert OffsetT data type
return DispatchHistogram<NUM_CHANNELS,
NUM_ACTIVE_CHANNELS,
SampleIteratorT,
CounterT,
LevelT,
int>::DispatchRange(d_temp_storage,
temp_storage_bytes,
d_samples,
d_histogram,
num_levels,
d_levels,
(int)num_row_pixels,
(int)num_rows,
(int)(row_stride_bytes /
sizeof(SampleT)),
stream,
is_byte_sample);
}
}

return DispatchHistogram<NUM_CHANNELS,
Expand Down
7 changes: 4 additions & 3 deletions cub/cub/util_ptx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,9 @@
_CCCL_IMPLICIT_SYSTEM_HEADER
#endif // !_CCCL_COMPILER_NVHPC

#include "util_debug.cuh"
#include "util_type.cuh"
#include <cub/detail/cpp_compatibility.cuh>
#include <cub/util_debug.cuh>
#include <cub/util_type.cuh>

CUB_NAMESPACE_BEGIN

Expand Down Expand Up @@ -464,7 +465,7 @@ unsigned int WarpMask(unsigned int warp_id)
unsigned int member_mask = 0xFFFFFFFFu >>
(CUB_WARP_THREADS(0) - LOGICAL_WARP_THREADS);

if (is_pow_of_two && !is_arch_warp)
CUB_IF_CONSTEXPR(is_pow_of_two && !is_arch_warp)
{
member_mask <<= warp_id * LOGICAL_WARP_THREADS;
}
Expand Down
11 changes: 8 additions & 3 deletions cub/test/catch2_test_block_radix_sort.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#pragma once

#include <cub/block/block_radix_sort.cuh>
#include <cub/detail/cpp_compatibility.cuh>

#include <thrust/gather.h>
#include <thrust/host_vector.h>
Expand Down Expand Up @@ -368,17 +369,21 @@ get_striped_keys(const thrust::host_vector<KeyT> &h_keys,

using traits_t = cub::Traits<KeyT>;
using bit_ordered_t = typename traits_t::UnsignedBits;
const bit_ordered_t negative_zero = bit_ordered_t{1} << bit_ordered_t{sizeof(bit_ordered_t) * 8 - 1};

const int num_bits = end_bit - begin_bit;

for (std::size_t i = 0; i < h_keys.size(); i++)
{
bit_ordered_t key = reinterpret_cast<const bit_ordered_t&>(h_keys[i]);

if (traits_t::CATEGORY == cub::FLOATING_POINT && key == negative_zero)
CUB_IF_CONSTEXPR(traits_t::CATEGORY == cub::FLOATING_POINT)
{
key = 0;
const bit_ordered_t negative_zero = bit_ordered_t(1) << bit_ordered_t(sizeof(bit_ordered_t) * 8 - 1);

if (key == negative_zero)
{
key = 0;
}
}

key = traits_t::TwiddleIn(key);
Expand Down
4 changes: 2 additions & 2 deletions cub/test/catch2_test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>

#include "cub/util_compiler.cuh"
#include <cub/util_compiler.cuh>
#include "test_util_vec.h"

#include "catch2_main.cuh"
#include "test_warning_suppression.cuh"

#ifndef VAR_IDX
#define VAR_IDX 0
Expand Down Expand Up @@ -142,4 +143,3 @@ namespace detail
random(std::numeric_limits<unsigned long long int>::min(), \
std::numeric_limits<unsigned long long int>::max()))) \
}

2 changes: 1 addition & 1 deletion cub/test/catch2_test_warp_load.cu
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ thrust::device_vector<T> generate_input()

thrust::device_vector<T> d_input(num_items);

if (LoadAlgorithm == cub::WarpLoadAlgorithm::WARP_LOAD_STRIPED)
CUB_IF_CONSTEXPR(LoadAlgorithm == cub::WarpLoadAlgorithm::WARP_LOAD_STRIPED)
{
thrust::host_vector<T> h_input(num_items);

Expand Down
14 changes: 8 additions & 6 deletions cub/test/catch2_test_warp_scan.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
*
******************************************************************************/

#include <cub/detail/cpp_compatibility.cuh>
#include <cub/util_macro.cuh>
#include <cub/warp/warp_scan.cuh>


#include <thrust/device_vector.h>
#include <thrust/host_vector.h>

Expand Down Expand Up @@ -119,7 +121,7 @@ struct sum_op_t
template <class WarpScanT, class T>
__device__ void operator()(WarpScanT &scan, T &thread_data) const
{
if (Mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(Mode == scan_mode::exclusive)
{
scan.ExclusiveSum(thread_data, thread_data);
}
Expand All @@ -141,7 +143,7 @@ struct sum_aggregate_op_t
{
T warp_aggregate{};

if (Mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(Mode == scan_mode::exclusive)
{
scan.ExclusiveSum(thread_data, thread_data, warp_aggregate);
}
Expand All @@ -165,7 +167,7 @@ struct min_op_t
template <class T, class WarpScanT>
__device__ void operator()(WarpScanT &scan, T &thread_data) const
{
if (Mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(Mode == scan_mode::exclusive)
{
scan.ExclusiveScan(thread_data, thread_data, cub::Min{});
}
Expand All @@ -187,7 +189,7 @@ struct min_aggregate_op_t
{
T warp_aggregate{};

if (Mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(Mode == scan_mode::exclusive)
{
scan.ExclusiveScan(thread_data, thread_data, cub::Min{}, warp_aggregate);
}
Expand Down Expand Up @@ -466,7 +468,7 @@ CUB_TEST("Warp scan works with custom scan op", "[scan][warp]", types, logical_w
// When comparing device output, the corresponding undefined data points need
// to be fixed

if (params::mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(params::mode == scan_mode::exclusive)
{
for (size_t i = 0; i < h_out.size(); i += params::logical_warp_threads)
{
Expand Down Expand Up @@ -515,7 +517,7 @@ CUB_TEST("Warp custom op scan returns valid warp aggregate",
// When comparing device output, the corresponding undefined data points need
// to be fixed

if (params::mode == scan_mode::exclusive)
CUB_IF_CONSTEXPR(params::mode == scan_mode::exclusive)
{
for (size_t i = 0; i < h_out.size(); i += params::logical_warp_threads)
{
Expand Down
6 changes: 5 additions & 1 deletion cub/test/test_device_batch_copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@
*
******************************************************************************/

#include <cub/detail/cpp_compatibility.cuh>
#include <cub/device/device_copy.cuh>
#include <cub/util_ptx.cuh>


#include <thrust/device_vector.h>
#include <thrust/host_vector.h>
#include <thrust/iterator/constant_iterator.h>
Expand Down Expand Up @@ -121,8 +123,10 @@ template <size_t n, typename... T>
typename std::enable_if<n + 1 <= thrust::tuple_size<thrust::tuple<T...>>::value>::type
print_tuple(std::ostream &os, const thrust::tuple<T...> &tup)
{
if (n != 0)
CUB_IF_CONSTEXPR(n != 0)
{
os << ", ";
}
os << thrust::get<n>(tup);
print_tuple<n + 1>(os, tup);
}
Expand Down
51 changes: 28 additions & 23 deletions cub/test/test_device_radix_sort.cu
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include <cuda_bf16.h>
#endif

#include <cub/detail/cpp_compatibility.cuh>
#include <cub/detail/device_synchronize.cuh>
#include <cub/device/device_radix_sort.cuh>
#include <cub/device/device_segmented_radix_sort.cuh>
Expand Down Expand Up @@ -790,10 +791,13 @@ void InitializeKeysSorted(
// Fill the array.
UnsignedBits key = TraitsT::TwiddleOut(twiddled_key);
// Avoid -0.0 for floating-point keys.
UnsignedBits negative_zero = UnsignedBits(1) << UnsignedBits(sizeof(UnsignedBits) * 8 - 1);
if (TraitsT::CATEGORY == cub::FLOATING_POINT && key == negative_zero)
CUB_IF_CONSTEXPR(TraitsT::CATEGORY == cub::FLOATING_POINT)
{
key = 0;
UnsignedBits negative_zero = UnsignedBits(1) << UnsignedBits(sizeof(UnsignedBits) * 8 - 1);
if (key == negative_zero)
{
key = 0;
}
}

for (; i < run_end; ++i)
Expand Down Expand Up @@ -1179,7 +1183,7 @@ void Test(

// If in/out API is used, we are not allowed to overwrite the input.
// Let's check that the input buffer is not overwritten by the algorithm.
if (BACKEND == CUB_NO_OVERWRITE)
CUB_IF_CONSTEXPR(BACKEND == CUB_NO_OVERWRITE)
{
KeyT *d_input_keys = reinterpret_cast<KeyT*>(d_keys.d_buffers[0]);

Expand Down Expand Up @@ -1522,25 +1526,26 @@ void TestBits(
EndOffsetIteratorT d_segment_end_offsets)
{
// Don't test partial-word sorting for boolean, fp, or signed types (the bit-flipping techniques get in the way) or pre-sorted keys
if ((Traits<KeyT>::CATEGORY == UNSIGNED_INTEGER)
&& (!std::is_same<KeyT, bool>::value)
&& !pre_sorted)
CUB_IF_CONSTEXPR((cub::Traits<KeyT>::CATEGORY == cub::UNSIGNED_INTEGER) && (!std::is_same<KeyT, bool>::value))
{
// Partial bits
int begin_bit = 1;
int end_bit = (sizeof(KeyT) * 8) - 1;
printf("Testing key bits [%d,%d)\n", begin_bit, end_bit); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, begin_bit, end_bit);

// Equal bits
begin_bit = end_bit = 0;
printf("Testing key bits [%d,%d)\n", begin_bit, end_bit); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, begin_bit, end_bit);

// Across subword boundaries
int mid_bit = sizeof(KeyT) * 4;
printf("Testing key bits [%d,%d)\n", mid_bit - 1, mid_bit + 1); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, mid_bit - 1, mid_bit + 1);
if (!pre_sorted)
{
// Partial bits
int begin_bit = 1;
int end_bit = (sizeof(KeyT) * 8) - 1;
printf("Testing key bits [%d,%d)\n", begin_bit, end_bit); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, begin_bit, end_bit);

// Equal bits
begin_bit = end_bit = 0;
printf("Testing key bits [%d,%d)\n", begin_bit, end_bit); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, begin_bit, end_bit);

// Across subword boundaries
int mid_bit = sizeof(KeyT) * 4;
printf("Testing key bits [%d,%d)\n", mid_bit - 1, mid_bit + 1); fflush(stdout);
TestDirection(h_keys, num_items, num_segments, pre_sorted, h_segment_offsets, d_segment_begin_offsets, d_segment_end_offsets, mid_bit - 1, mid_bit + 1);
}
}

printf("Testing key bits [%d,%d)\n", 0, int(sizeof(KeyT)) * 8); fflush(stdout);
Expand Down Expand Up @@ -1734,7 +1739,7 @@ void TestGen(
TestSizes(h_keys.get(), max_items, max_segments, false);
}

if (cub::Traits<KeyT>::CATEGORY == cub::FLOATING_POINT)
CUB_IF_CONSTEXPR(cub::Traits<KeyT>::CATEGORY == cub::FLOATING_POINT)
{
printf("\nTesting random %s keys with some replaced with -0.0 or +0.0 \n", typeid(KeyT).name());
fflush(stdout);
Expand Down
1 change: 1 addition & 0 deletions cub/test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
#include "c2h/extended_types.cuh"
#include "mersenne.h"
#include "test_util_vec.h"
#include "test_warning_suppression.cuh"
#include <nv/target>

/******************************************************************************
Expand Down
Loading

0 comments on commit 08fcae6

Please sign in to comment.