Skip to content

Commit

Permalink
Port Custom Reduction tests
Browse files Browse the repository at this point in the history
  • Loading branch information
cz4rs committed Feb 6, 2023
1 parent 5635e13 commit 25876cf
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 44 deletions.
5 changes: 3 additions & 2 deletions core/perf_test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ IF(KOKKOS_ENABLE_TESTS)
SET(SOURCES
PerfTestMain.cpp
PerfTestGramSchmidt.cpp
PerfTest_CustomReduction.cpp
PerfTest_ExecSpacePartitioning.cpp
)
)


IF(Kokkos_ENABLE_OPENMPTARGET)
# FIXME OPENMPTARGET requires TeamPolicy Reductions and Custom Reduction
Expand Down Expand Up @@ -176,6 +176,7 @@ SET(
BENCHMARK_SOURCES
BenchmarkMain.cpp
Benchmark_Context.cpp
PerfTest_CustomReduction.cpp
PerfTestHexGrad.cpp
PerfTest_ViewAllocate.cpp
PerfTest_ViewCopy_a123.cpp
Expand Down
107 changes: 65 additions & 42 deletions core/perf_test/PerfTest_CustomReduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,16 @@
//@HEADER

#include <Kokkos_Core.hpp>
#include <gtest/gtest.h>
#include <benchmark/benchmark.h>
#include "Benchmark_Context.hpp"
#include <PerfTest_Category.hpp>
#include <Kokkos_Random.hpp>
#include <utility>

#ifdef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
namespace Test {
template <class Scalar>
void custom_reduction_test(int N, int R, int num_trials) {
std::pair<double, Scalar> custom_reduction_test(int N, int R) {
Kokkos::Random_XorShift64_Pool<> rand_pool(183291);
Kokkos::View<Scalar*> a("A", N);
Kokkos::fill_random(a, rand_pool, 1.0);
Expand Down Expand Up @@ -62,49 +64,70 @@ void custom_reduction_test(int N, int R, int num_trials) {

// Timing
Kokkos::Timer timer;
for (int r = 0; r < num_trials; r++) {
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<>(N / 1024, team_size),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team,
Scalar& lmax) {
Scalar team_max = Scalar(0);
for (int rr = 0; rr < R; rr++) {
int i = team.league_rank();
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, 32),
[&](const int& j, Scalar& thread_max) {
Scalar t_max = Scalar(0);
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, 32),
[&](const int& k, Scalar& max_) {
const Scalar val = a((i * 32 + j) * 32 + k);
if (val > max_) max_ = val;
if ((k == 11) && (j == 17) && (i == 2)) max_ = 11.5;
},
Kokkos::Max<Scalar>(t_max));
if (t_max > thread_max) thread_max = t_max;
},
Kokkos::Max<Scalar>(team_max));
}
if (team_max > lmax) lmax = team_max;
},
Kokkos::Max<Scalar>(max));
}
double time = timer.seconds();
printf("%e %e %e\n", time,
1.0 * N * R * num_trials * sizeof(Scalar) / time / 1024 / 1024 / 1024,
max);
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<>(N / 1024, team_size),
KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team,
Scalar& lmax) {
Scalar team_max = Scalar(0);
for (int rr = 0; rr < R; rr++) {
int i = team.league_rank();
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, 32),
[&](const int& j, Scalar& thread_max) {
Scalar t_max = Scalar(0);
Kokkos::parallel_reduce(
Kokkos::ThreadVectorRange(team, 32),
[&](const int& k, Scalar& max_) {
const Scalar val = a((i * 32 + j) * 32 + k);
if (val > max_) max_ = val;
if ((k == 11) && (j == 17) && (i == 2)) max_ = 11.5;
},
Kokkos::Max<Scalar>(t_max));
if (t_max > thread_max) thread_max = t_max;
},
Kokkos::Max<Scalar>(team_max));
}
if (team_max > lmax) lmax = team_max;
},
Kokkos::Max<Scalar>(max));

return std::make_pair(timer.seconds(), max);
}

TEST(default_exec, custom_reduction) {
int N = 100000;
int R = 1000;
int num_trials = 1;
int get_N(benchmark::State& state) {
return (Test::command_line_num_args() > 1)
? std::stoi(Test::command_line_arg(1))
: state.range(0);
}

if (command_line_num_args() > 1) N = std::stoi(command_line_arg(1));
if (command_line_num_args() > 2) R = std::stoi(command_line_arg(2));
if (command_line_num_args() > 3) num_trials = std::stoi(command_line_arg(3));
custom_reduction_test<double>(N, R, num_trials);
int get_R(benchmark::State& state) {
return (Test::command_line_num_args() > 2)
? std::stoi(Test::command_line_arg(2))
: state.range(1);
}

template <class Scalar>
static void CustomReduction(benchmark::State& state) {
int N = get_N(state);
int R = get_R(state);

for (auto _ : state) {
auto results = custom_reduction_test<double>(N, R);
// data processed in gigabytes
const double data_processed =
N * R * sizeof(Scalar) / results.first / 1'000'000'000;

state.SetIterationTime(results.first);
state.counters[KokkosBenchmark::benchmark_fom("GB/s")] = benchmark::Counter(
data_processed, benchmark::Counter::kIsIterationInvariantRate);
state.counters["Max"] = benchmark::Counter(results.second);
}
}

BENCHMARK(CustomReduction<double>)
->ArgNames({"N", "R"})
->Args({100'000, 1'000})
->UseManualTime();

} // namespace Test
#endif

0 comments on commit 25876cf

Please sign in to comment.