From 3dfb1a9f97c149aea43cd6206626ad188a0b1e06 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 21 Dec 2023 00:58:05 +0100 Subject: [PATCH] Add a test for independent_groups and independent_group_elements --- .../AlpakaInterface/test/BuildFile.xml | 7 + .../test/alpaka/testIndependentKernel.dev.cc | 144 ++++++++++++++++++ 2 files changed, 151 insertions(+) create mode 100644 HeterogeneousCore/AlpakaInterface/test/alpaka/testIndependentKernel.dev.cc diff --git a/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml b/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml index 5f9c5fe81981f..2d204819d740b 100644 --- a/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml +++ b/HeterogeneousCore/AlpakaInterface/test/BuildFile.xml @@ -12,6 +12,13 @@ + + + + + + + diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testIndependentKernel.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testIndependentKernel.dev.cc new file mode 100644 index 0000000000000..bd98efcfa32d6 --- /dev/null +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testIndependentKernel.dev.cc @@ -0,0 +1,144 @@ +#include +#include + +#include + +#define CATCH_CONFIG_MAIN +#include + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +// each test binary is built for a single Alpaka backend +using namespace ALPAKA_ACCELERATOR_NAMESPACE; + +/* Add the group id to te value of each element in the group. + * Each group is composed by the elements first[group]..first[group+1]-1 . + */ +struct IndependentWorkKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + T const* __restrict__ in, + T* __restrict__ out, + size_t const* __restrict__ indices, + size_t groups) const { + for (auto group : cms::alpakatools::independent_groups(acc, groups)) { + size_t first = indices[group]; + size_t last = indices[group + 1]; + size_t size = last - first; + for (auto index : cms::alpakatools::independent_group_elements(acc, size)) { + out[first + index] = in[first + index] + group; + } + } + } +}; + +/* Test the IndependentWorkKernel kernel on all devices + */ +template +void testIndependentWorkKernel(size_t groups, size_t grid_size, size_t block_size, TKernel kernel) { + // random number generator with a gaussian distribution + std::random_device rd{}; + std::default_random_engine engine{rd()}; + + // uniform distribution + std::uniform_int_distribution random_size{100, 201}; + + // gaussian distribution + std::normal_distribution dist{0., 1.}; + + // build the groups + std::vector sizes(groups); + auto indices_h = cms::alpakatools::make_host_buffer(groups + 1); + indices_h[0] = 0; + for (size_t i = 0; i < groups; ++i) { + auto size = random_size(engine); + sizes[i] = size; + indices_h[i + 1] = indices_h[i] + size; + } + + // tolerance + constexpr float epsilon = 0.000001; + + // buffer size + const size_t size = indices_h[groups]; + + // allocate the input and output host buffer in pinned memory accessible by the Platform devices + auto in_h = cms::alpakatools::make_host_buffer(size); + auto out_h = cms::alpakatools::make_host_buffer(size); + + // fill the input buffers with random data, and the output buffer with zeros + for (size_t i = 0; i < size; ++i) { + in_h[i] = dist(engine); + out_h[i] = 0; + } + + // run the test on each device + for (auto const& device : cms::alpakatools::devices()) { + std::cout << "Test IndependentWorkKernel on " << alpaka::getName(device) << " over " << size << " elements in " + << groups << " independent groups with " << grid_size << " blocks of " << block_size << " elements\n"; + auto queue = Queue(device); + + // allocate input and output buffers on the device + auto indices_d = cms::alpakatools::make_device_buffer(queue, groups + 1); + auto in_d = cms::alpakatools::make_device_buffer(queue, size); + auto out_d = cms::alpakatools::make_device_buffer(queue, size); + + // copy the input data to the device; the size is known from the buffer objects + alpaka::memcpy(queue, indices_d, indices_h); + alpaka::memcpy(queue, in_d, in_h); + + // fill the output buffer with zeros; the size is known from the buffer objects + alpaka::memset(queue, out_d, 0.); + + // launch the 1-dimensional kernel with independent work groups + auto div = cms::alpakatools::make_workdiv(grid_size, block_size); + alpaka::exec(queue, div, kernel, in_d.data(), out_d.data(), indices_d.data(), groups); + + // copy the results from the device to the host + alpaka::memcpy(queue, out_h, out_d); + + // wait for all the operations to complete + alpaka::wait(queue); + + // check the results + for (size_t g = 0; g < groups; ++g) { + size_t first = indices_h[g]; + size_t last = indices_h[g + 1]; + for (size_t i = first; i < last; ++i) { + float sum = in_h[i] + g; + float delta = std::max(std::fabs(sum) * epsilon, epsilon); + REQUIRE(out_h[i] < sum + delta); + REQUIRE(out_h[i] > sum - delta); + } + } + } +} + +TEST_CASE("Test alpaka kernels for the " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) " backend", + "[" EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE) "]") { + SECTION("Independent work groups") { + // get the list of devices on the current platform + auto const& devices = cms::alpakatools::devices(); + if (devices.empty()) { + INFO("No devices available on the platform " EDM_STRINGIZE(ALPAKA_ACCELERATOR_NAMESPACE)); + REQUIRE(not devices.empty()); + } + + // launch the independent work kernel with a small block size and a small number of blocks; + // this relies on the kernel to loop over the "problem space" and do more work per block + std::cout << "Test independent work kernel with small block size, using scalar dimensions\n"; + testIndependentWorkKernel(100, 32, 32, IndependentWorkKernel{}); + + // launch the independent work kernel with a large block size and a single block; + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data + std::cout << "Test independent work kernel with large block size, using scalar dimensions\n"; + testIndependentWorkKernel(100, 1, 1024, IndependentWorkKernel{}); + + // launch the independent work kernel with a large block size and a large number of blocks; + // this relies on the kernel to check the size of the "problem space" and avoid accessing out-of-bounds data + std::cout << "Test independent work kernel with large block size, using scalar dimensions\n"; + testIndependentWorkKernel(100, 1024, 1024, IndependentWorkKernel{}); + } +}