-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#5641: Fix HIP & CUDA MDRange reduce for sizeof(value_type) < sizeof(…
…int) (#5745) * Fix comment typo noticed in early analysis * Add test case * #5641: HIP: Fix MDRange parallel_reduce over values smaller than int * #5641 Cuda: Fix MDRange parallel_reduce over values smaller than int * Try to appease icpc's idiocy * Skip the test for OpenMPTarget backend, since it's broken * Sample bound values to test, rather than sweeping * Shrink largest bound value to avoid timeout * Report skipped in disabled CUDA extended lambda case * Fix skipping condition
- Loading branch information
1 parent
9786d57
commit ee75763
Showing
5 changed files
with
148 additions
and
35 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
//@HEADER | ||
// ************************************************************************ | ||
// | ||
// Kokkos v. 4.0 | ||
// Copyright (2022) National Technology & Engineering | ||
// Solutions of Sandia, LLC (NTESS). | ||
// | ||
// Under the terms of Contract DE-NA0003525 with NTESS, | ||
// the U.S. Government retains certain rights in this software. | ||
// | ||
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://kokkos.org/LICENSE for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//@HEADER | ||
|
||
#include <gtest/gtest.h> | ||
|
||
#include <Kokkos_Core.hpp> | ||
|
||
namespace { | ||
|
||
template <typename T> | ||
void MDRangeReduceTester([[maybe_unused]] int bound, int k) { | ||
const auto policy_MD = Kokkos::MDRangePolicy<Kokkos::Rank<2>, TEST_EXECSPACE>( | ||
{0, 0}, {bound, 2}); | ||
|
||
// No explicit fence() calls needed because result is in HostSpace | ||
{ | ||
T lor_MD = 0; | ||
Kokkos::parallel_reduce( | ||
policy_MD, | ||
KOKKOS_LAMBDA(const int i, const int, T& res) { res = res || i == k; }, | ||
Kokkos::LOr<T>(lor_MD)); | ||
EXPECT_EQ(lor_MD, 1); | ||
} | ||
{ | ||
// Stick just a few true values in the Logical-OR reduction space, | ||
// to try to make sure every value is being captured | ||
T land_MD = 0; | ||
Kokkos::parallel_reduce( | ||
policy_MD, KOKKOS_LAMBDA(const int, const int, T& res) { res = 1; }, | ||
Kokkos::LAnd<T>(land_MD)); | ||
EXPECT_EQ(land_MD, 1); | ||
} | ||
} | ||
|
||
TEST(TEST_CATEGORY, mdrange_parallel_reduce_primitive_types) { | ||
#if defined(KOKKOS_ENABLE_OPENMPTARGET) | ||
GTEST_SKIP() << "FIXME OPENMPTARGET Tests of MDRange reduce over values " | ||
"smaller than int would fail"; | ||
#elif defined(KOKKOS_ENABLE_CUDA) && !defined(KOKKOS_ENABLE_CUDA_LAMBDA) | ||
GTEST_SKIP() << "Skipped ENABLE_CUDA_LAMBDA"; | ||
#else | ||
for (int bound : {0, 1, 7, 32, 65, 7000}) { | ||
for (int k = 0; k < bound; ++k) { | ||
MDRangeReduceTester<bool>(bound, k); | ||
MDRangeReduceTester<signed char>(bound, k); | ||
MDRangeReduceTester<int8_t>(bound, k); | ||
MDRangeReduceTester<int16_t>(bound, k); | ||
MDRangeReduceTester<int32_t>(bound, k); | ||
MDRangeReduceTester<int64_t>(bound, k); | ||
} | ||
} | ||
#endif | ||
} | ||
|
||
} // namespace |