From d5ef9778a3d3588cbc4f4b1c9d86348409003c3c Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 2 Feb 2023 18:34:02 +0100 Subject: [PATCH 1/3] Add support for HIP/ROCm Add a test using a SoA in ROCm device code. --- DataFormats/SoATemplate/interface/SoACommon.h | 8 +- DataFormats/SoATemplate/test/BuildFile.xml | 25 +- .../test/SoALayoutAndView_t.hip.cc | 291 ++++++++++++++++++ DataFormats/SoATemplate/test/classes.h | 2 +- 4 files changed, 316 insertions(+), 10 deletions(-) create mode 100644 DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc diff --git a/DataFormats/SoATemplate/interface/SoACommon.h b/DataFormats/SoATemplate/interface/SoACommon.h index 220ea73176d71..7949a3c4c69fe 100644 --- a/DataFormats/SoATemplate/interface/SoACommon.h +++ b/DataFormats/SoATemplate/interface/SoACommon.h @@ -16,7 +16,7 @@ #include "FWCore/Utilities/interface/typedefs.h" // CUDA attributes -#ifdef __CUDACC__ +#if defined(__CUDACC__) || defined(__HIPCC__) #define SOA_HOST_ONLY __host__ #define SOA_DEVICE_ONLY __device__ #define SOA_HOST_DEVICE __host__ __device__ @@ -35,6 +35,12 @@ printf("%s\n", (A)); \ __trap(); \ } +#elif defined(__HIPCC__) && defined(__HIP_DEVICE_COMPILE__) +#define SOA_THROW_OUT_OF_RANGE(A) \ + { \ + printf("%s\n", (A)); \ + abort(); \ + } #else #define SOA_THROW_OUT_OF_RANGE(A) \ { throw std::out_of_range(A); } diff --git a/DataFormats/SoATemplate/test/BuildFile.xml b/DataFormats/SoATemplate/test/BuildFile.xml index 7c62e15bde402..4c3fedb10e13c 100644 --- a/DataFormats/SoATemplate/test/BuildFile.xml +++ b/DataFormats/SoATemplate/test/BuildFile.xml @@ -1,17 +1,26 @@ - + + - - - - + + + + + diff --git a/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc b/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc new file mode 100644 index 0000000000000..3fd5dc72969c2 --- /dev/null +++ b/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc @@ -0,0 +1,291 @@ +#include +#include +#include + +#include + +#include +#include + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/SoATemplate/interface/SoAView.h" +#include "HeterogeneousCore/ROCmUtilities/interface/hipCheck.h" +#include "HeterogeneousCore/ROCmUtilities/interface/requireDevices.h" + +// Test SoA stores and view. +// Use cases +// Multiple stores in a buffer +// Scalars, Columns of scalars and of Eigen vectors +// View to each of them, from one and multiple stores. + +GENERATE_SOA_LAYOUT(SoAHostDeviceLayoutTemplate, + /*SoAHostDeviceViewTemplate,*/ + // predefined static scalars + // size_t size; + // size_t alignment; + + // columns: one value per element + SOA_COLUMN(double, x), + SOA_COLUMN(double, y), + SOA_COLUMN(double, z), + SOA_EIGEN_COLUMN(Eigen::Vector3d, a), + SOA_EIGEN_COLUMN(Eigen::Vector3d, b), + SOA_EIGEN_COLUMN(Eigen::Vector3d, r), + // scalars: one value for the whole structure + SOA_SCALAR(const char*, description), + SOA_SCALAR(uint32_t, someNumber)) + +using SoAHostDeviceLayout = SoAHostDeviceLayoutTemplate<>; +using SoAHostDeviceView = SoAHostDeviceLayout::View; +using SoAHostDeviceConstView = SoAHostDeviceLayout::ConstView; + +GENERATE_SOA_LAYOUT(SoADeviceOnlyLayoutTemplate, + /*SoADeviceOnlyViewTemplate,*/ + SOA_COLUMN(uint16_t, color), + SOA_COLUMN(double, value), + SOA_COLUMN(double*, py), + SOA_COLUMN(uint32_t, count), + SOA_COLUMN(uint32_t, anotherCount)) + +using SoADeviceOnlyLayout = SoADeviceOnlyLayoutTemplate<>; +using SoADeviceOnlyView = SoADeviceOnlyLayout::View; + +// A 1 to 1 view of the store (except for unsupported types). +GENERATE_SOA_VIEW(SoAFullDeviceConstViewTemplate, + SoAFullDeviceViewTemplate, + SOA_VIEW_LAYOUT_LIST(SOA_VIEW_LAYOUT(SoAHostDeviceLayout, soaHD), + SOA_VIEW_LAYOUT(SoADeviceOnlyLayout, soaDO)), + SOA_VIEW_VALUE_LIST(SOA_VIEW_VALUE(soaHD, x), + SOA_VIEW_VALUE(soaHD, y), + SOA_VIEW_VALUE(soaHD, z), + SOA_VIEW_VALUE(soaDO, color), + SOA_VIEW_VALUE(soaDO, value), + SOA_VIEW_VALUE(soaDO, py), + SOA_VIEW_VALUE(soaDO, count), + SOA_VIEW_VALUE(soaDO, anotherCount), + SOA_VIEW_VALUE(soaHD, description), + SOA_VIEW_VALUE(soaHD, someNumber))) + +using SoAFullDeviceView = + SoAFullDeviceViewTemplate; + +// Eigen cross product kernel (on store) +__global__ void crossProduct(SoAHostDeviceView soa, const int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.r() = si.a().cross(si.b()); +} + +// Device-only producer kernel +__global__ void producerKernel(SoAFullDeviceView soa, const int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.color() &= 0x55 << i % (sizeof(si.color()) - sizeof(char)); + si.value() = sqrt(si.x() * si.x() + si.y() * si.y() + si.z() * si.z()); +} + +// Device-only consumer with result in host-device area +__global__ void consumerKernel(SoAFullDeviceView soa, const int numElements) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i >= numElements) + return; + auto si = soa[i]; + si.x() = si.color() * si.value(); +} + +// Get a view like the default, except for range checking +using RangeCheckingHostDeviceView = + SoAHostDeviceLayout::ViewTemplate; + +// We expect to just run one thread. +__global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) { + printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int) threadIdx.x); + [[maybe_unused]] auto si = soa[soa.metadata().size()]; + printf("Fail: range-check failure should have stopped the kernel.\n"); +} + +int main(void) { + cms::rocmtest::requireDevices(); + + hipStream_t stream; + hipCheck(hipStreamCreateWithFlags(&stream, hipStreamNonBlocking)); + + // Non-aligned number of elements to check alignment features. + constexpr unsigned int numElements = 65537; + + // Allocate buffer and store on host + size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements); + std::byte* h_buf = nullptr; + hipCheck(hipMallocHost((void**) &h_buf, hostDeviceSize)); + SoAHostDeviceLayout h_soahdLayout(h_buf, numElements); + SoAHostDeviceView h_soahd(h_soahdLayout); + SoAHostDeviceConstView h_soahd_c(h_soahdLayout); + + // Alocate buffer, stores and views on the device (single, shared buffer). + size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements); + std::byte* d_buf = nullptr; + hipCheck(hipMallocHost((void**) &d_buf, hostDeviceSize + deviceOnlySize)); + SoAHostDeviceLayout d_soahdLayout(d_buf, numElements); + SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements); + SoAHostDeviceView d_soahdView(d_soahdLayout); + SoAFullDeviceView d_soaFullView(d_soahdLayout, d_soadoLayout); + + // Assert column alignments + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_x()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_y()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_z()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_a()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_b()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_r()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_description()) % decltype(h_soahd)::alignment); + assert(0 == reinterpret_cast(h_soahd.metadata().addressOf_someNumber()) % decltype(h_soahd)::alignment); + + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_x()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_y()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_z()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_a()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_b()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_r()) % decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_description()) % + decltype(d_soahdLayout)::alignment); + assert(0 == reinterpret_cast(d_soahdLayout.metadata().addressOf_someNumber()) % + decltype(d_soahdLayout)::alignment); + + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_color()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_value()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_py()) % decltype(d_soadoLayout)::alignment); + assert(0 == + reinterpret_cast(d_soadoLayout.metadata().addressOf_count()) % decltype(d_soadoLayout)::alignment); + assert(0 == reinterpret_cast(d_soadoLayout.metadata().addressOf_anotherCount()) % + decltype(d_soadoLayout)::alignment); + + // Views should get the same alignment as the stores they refer to + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_x()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_y()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_z()) % decltype(d_soaFullView)::alignment); + // Limitation of views: we have to get scalar member addresses via metadata. + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_description()) % + decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_someNumber()) % + decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_color()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_value()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_py()) % decltype(d_soaFullView)::alignment); + assert(0 == + reinterpret_cast(d_soaFullView.metadata().addressOf_count()) % decltype(d_soaFullView)::alignment); + assert(0 == reinterpret_cast(d_soaFullView.metadata().addressOf_anotherCount()) % + decltype(d_soaFullView)::alignment); + + // Initialize and fill the host buffer + std::memset(h_soahdLayout.metadata().data(), 0, hostDeviceSize); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd[i]; + // Tuple assignment... + // elements are: x, y, z, a, b, r + auto v1 = 1.0 * i + 1.0; + auto v2 = 2.0 * i; + auto v3 = 3.0 * i - 1.0; + if (i % 2) { + si = {v1, v2, v3, {v1, v2, v3}, {v3, v2, v1}, {0, 0, 0}}; + } else { + si.x() = si.a()(0) = si.b()(2) = v1; + si.y() = si.a()(1) = si.b()(1) = v2; + si.z() = si.a()(2) = si.b()(0) = v3; + } + } + auto& sn = h_soahd.someNumber(); + sn = numElements + 2; + + // Push to device + hipCheck(hipMemcpyAsync(d_buf, h_buf, hostDeviceSize, hipMemcpyDefault, stream)); + + // Process on device + crossProduct<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soahdView, numElements); + + // Paint the device only with 0xFF initially + hipCheck(hipMemsetAsync(d_soadoLayout.metadata().data(), 0xFF, d_soadoLayout.metadata().byteSize(), stream)); + + // Produce to the device only area + producerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Consume the device only area and generate a result on the host-device area + consumerKernel<<<(numElements + 255) / 256, 256, 0, stream>>>(d_soaFullView, numElements); + + // Get result back + hipCheck(hipMemcpyAsync(h_buf, d_buf, hostDeviceSize, hipMemcpyDefault, stream)); + + // Wait and validate. + hipCheck(hipStreamSynchronize(stream)); + for (size_t i = 0; i < numElements; ++i) { + auto si = h_soahd_c[i]; + assert(si.r() == si.a().cross(si.b())); + double initialX = 1.0 * i + 1.0; + double initialY = 2.0 * i; + double initialZ = 3.0 * i - 1.0; + uint16_t expectedColor = 0x55 << i % (sizeof(uint16_t) - sizeof(char)); + double expectedX = expectedColor * sqrt(initialX * initialX + initialY * initialY + initialZ * initialZ); + if (abs(si.x() - expectedX) / expectedX >= 2 * std::numeric_limits::epsilon()) { + std::cout << "X failed: for i=" << i << std::endl + << "initialX=" << initialX << " initialY=" << initialY << " initialZ=" << initialZ << std::endl + << "expectedX=" << expectedX << std::endl + << "resultX=" << si.x() << " resultY=" << si.y() << " resultZ=" << si.z() << std::endl + << "relativeDiff=" << abs(si.x() - expectedX) / expectedX + << " epsilon=" << std::numeric_limits::epsilon() << std::endl; + assert(false); + } + } + + // Validation of range checking + try { + // Get a view like the default, except for range checking + SoAHostDeviceLayout::ViewTemplate + soa1viewRangeChecking(h_soahdLayout); + // This should throw an exception + [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()]; + std::cout << "Fail: expected range-check exception (operator[]) not caught on the host." << std::endl; + assert(false); + } catch (const std::out_of_range&) { + std::cout << "Pass: expected range-check exception (operator[]) successfully caught on the host." << std::endl; + } + + try { + // Get a view like the default, except for range checking + SoAHostDeviceLayout::ViewTemplate + soa1viewRangeChecking(h_soahdLayout); + // This should throw an exception + [[maybe_unused]] auto si = soa1viewRangeChecking[soa1viewRangeChecking.metadata().size()]; + std::cout << "Fail: expected range-check exception (view-level index access) not caught on the host." << std::endl; + assert(false); + } catch (const std::out_of_range&) { + std::cout << "Pass: expected range-check exception (view-level index access) successfully caught on the host." + << std::endl; + } + + // Validation of range checking in a kernel + // Get a view like the default one, except for range checking + RangeCheckingHostDeviceView soa1viewRangeChecking(d_soahdLayout); + + // This should throw an exception in the kernel + rangeCheckKernel<<<1, 1, 0, stream>>>(soa1viewRangeChecking); + + // Wait and confirm that the ROCm kernel failed + try { + hipCheck(hipStreamSynchronize(stream)); + std::cout << "Fail: expected range-check exception not caught while executing the kernel." << std::endl; + assert(false); + } catch (const std::runtime_error&) { + std::cout << "Pass: expected range-check exception caught while executing the kernel." << std::endl; + } + + std::cout << "OK" << std::endl; +} diff --git a/DataFormats/SoATemplate/test/classes.h b/DataFormats/SoATemplate/test/classes.h index bba9150237d88..08c7f716d44b8 100644 --- a/DataFormats/SoATemplate/test/classes.h +++ b/DataFormats/SoATemplate/test/classes.h @@ -1 +1 @@ -#include "DataFormats/SoATemplate/test/FakeSoA.h" \ No newline at end of file +#include "DataFormats/SoATemplate/test/FakeSoA.h" From 2da538da99bdb87cab86917119f4457e58678b0d Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 2 Feb 2023 18:39:30 +0100 Subject: [PATCH 2/3] Add or improve support for HIP/ROCm --- .../AlpakaInterface/interface/AllocatorPolicy.h | 2 ++ .../AlpakaInterface/interface/CachedBufAlloc.h | 6 +----- .../AlpakaServices/src/alpaka/AlpakaService.cc | 16 ++++++++++++++++ 3 files changed, 19 insertions(+), 5 deletions(-) diff --git a/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h b/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h index 5324c99ab445c..099469f67a6c4 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h +++ b/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h @@ -43,6 +43,8 @@ namespace cms::alpakatools { constexpr inline AllocatorPolicy allocator_policy = #if !defined ALPAKA_DISABLE_CACHING_ALLOCATOR AllocatorPolicy::Caching; +#elif HIP_VERSION >= 50400000 && !defined ALPAKA_DISABLE_ASYNC_ALLOCATOR + AllocatorPolicy::Asynchronous; #else AllocatorPolicy::Synchronous; #endif diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h index 1a85eaaf3f4af..3f6ba61237448 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h @@ -159,11 +159,7 @@ namespace cms::alpakatools { }; //! The caching memory allocator implementation for the ROCm/HIP device - template >> + template struct CachedBufAlloc { template ALPAKA_FN_HOST static auto allocCachedBuf(alpaka::DevHipRt const& dev, TQueue queue, TExtent const& extent) diff --git a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc index bbb8bc55a8dc1..d06a6adc39168 100644 --- a/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc +++ b/HeterogeneousCore/AlpakaServices/src/alpaka/AlpakaService.cc @@ -19,6 +19,11 @@ #include "HeterogeneousCore/CUDAServices/interface/CUDAService.h" #endif // ALPAKA_ACC_GPU_CUDA_ENABLED +#ifdef ALPAKA_ACC_GPU_HIP_ENABLED +#include "FWCore/ServiceRegistry/interface/Service.h" +#include "HeterogeneousCore/ROCmServices/interface/ROCmService.h" +#endif // ALPAKA_ACC_GPU_HIP_ENABLED + namespace ALPAKA_ACCELERATOR_NAMESPACE { AlpakaService::AlpakaService(edm::ParameterSet const& config, edm::ActivityRegistry&) @@ -28,6 +33,10 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { // rely on the CUDAService to initialise the CUDA devices edm::Service cudaService; #endif // ALPAKA_ACC_GPU_CUDA_ENABLED +#ifdef ALPAKA_ACC_GPU_HIP_ENABLED + // rely on the ROCmService to initialise the ROCm devices + edm::Service rocmService; +#endif // ALPAKA_ACC_GPU_HIP_ENABLED // TODO from Andrea Bocci: // - handle alpaka caching allocators ? @@ -45,6 +54,13 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { return; } #endif // ALPAKA_ACC_GPU_CUDA_ENABLED +#ifdef ALPAKA_ACC_GPU_HIP_ENABLED + if (not rocmService->enabled()) { + enabled_ = false; + edm::LogInfo("AlpakaService") << ALPAKA_TYPE_ALIAS_NAME(AlpakaService) << " disabled by ROCmService"; + return; + } +#endif // ALPAKA_ACC_GPU_HIP_ENABLED // enumerate all devices on this platform auto const& devices = cms::alpakatools::devices(); From a14e8fc06624d86cc61232857b6ec0e286e81980 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Thu, 2 Feb 2023 18:40:44 +0100 Subject: [PATCH 3/3] Update code for Alpaka devel as of 2023.02.1 --- .../Portable/interface/PortableCollection.h | 4 +- .../interface/PortableDeviceCollection.h | 7 ++-- .../interface/PortableHostCollection.h | 5 ++- DataFormats/Portable/interface/Product.h | 5 ++- DataFormats/Portable/interface/ProductBase.h | 3 +- .../test/SoALayoutAndView_t.hip.cc | 6 +-- .../AlpakaCore/interface/ContextState.h | 2 +- .../AlpakaCore/interface/ScopedContext.h | 6 ++- .../AlpakaCore/interface/chooseDevice.h | 4 +- .../AlpakaCore/src/alpaka/EDMetadata.cc | 2 + .../interface/AllocatorPolicy.h | 2 +- .../interface/CachedBufAlloc.h | 4 +- .../interface/CachingAllocator.h | 2 +- .../interface/ScopedContextFwd.h | 14 +++---- .../AlpakaInterface/interface/devices.h | 5 +-- .../interface/getDeviceCachingAllocator.h | 6 ++- .../interface/getHostCachingAllocator.h | 4 +- .../AlpakaInterface/interface/memory.h | 35 +++++++++------- .../AlpakaInterface/interface/traits.h | 38 ----------------- .../AlpakaInterface/interface/vec.h | 42 ------------------- .../AlpakaInterface/interface/workdivision.h | 16 +++---- .../test/alpaka/testKernel.dev.cc | 1 - .../AlpakaInterface/test/alpaka/testVec.cc | 1 - .../AlpakaTest/plugins/alpaka/TestAlgo.dev.cc | 2 +- 24 files changed, 74 insertions(+), 142 deletions(-) delete mode 100644 HeterogeneousCore/AlpakaInterface/interface/vec.h diff --git a/DataFormats/Portable/interface/PortableCollection.h b/DataFormats/Portable/interface/PortableCollection.h index 068a8b86038c6..86d117c02c81d 100644 --- a/DataFormats/Portable/interface/PortableCollection.h +++ b/DataFormats/Portable/interface/PortableCollection.h @@ -6,13 +6,13 @@ namespace traits { // trait for a generic SoA-based product - template >> + template >> class PortableCollectionTrait; } // namespace traits // type alias for a generic SoA-based product -template >> +template >> using PortableCollection = typename traits::PortableCollectionTrait::CollectionType; #endif // DataFormats_Portable_interface_PortableCollection_h diff --git a/DataFormats/Portable/interface/PortableDeviceCollection.h b/DataFormats/Portable/interface/PortableDeviceCollection.h index 6b3a4f51eb964..84ed057e82f8c 100644 --- a/DataFormats/Portable/interface/PortableDeviceCollection.h +++ b/DataFormats/Portable/interface/PortableDeviceCollection.h @@ -5,12 +5,13 @@ #include #include +#include + #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" // generic SoA-based product in device memory -template >> +template >> class PortableDeviceCollection { static_assert(not std::is_same_v, "Use PortableHostCollection instead of PortableDeviceCollection"); @@ -32,7 +33,7 @@ class PortableDeviceCollection { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); } - template >> + template >> PortableDeviceCollection(int32_t elements, TQueue const& queue) : buffer_{cms::alpakatools::make_device_buffer(queue, Layout::computeDataSize(elements))}, layout_{buffer_->data(), elements}, diff --git a/DataFormats/Portable/interface/PortableHostCollection.h b/DataFormats/Portable/interface/PortableHostCollection.h index 0784bb83e3256..1a75b5d7e4b9c 100644 --- a/DataFormats/Portable/interface/PortableHostCollection.h +++ b/DataFormats/Portable/interface/PortableHostCollection.h @@ -4,10 +4,11 @@ #include #include +#include + #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/host.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" // generic SoA-based product in host memory template @@ -30,7 +31,7 @@ class PortableHostCollection { assert(reinterpret_cast(buffer_->data()) % Layout::alignment == 0); } - template >> + template >> PortableHostCollection(int32_t elements, TQueue const& queue) // allocate pinned host memory associated to the given work queue, accessible by the queue's device : buffer_{cms::alpakatools::make_host_buffer(queue, Layout::computeDataSize(elements))}, diff --git a/DataFormats/Portable/interface/Product.h b/DataFormats/Portable/interface/Product.h index 177c553b83d46..6b66d09060ab7 100644 --- a/DataFormats/Portable/interface/Product.h +++ b/DataFormats/Portable/interface/Product.h @@ -4,10 +4,11 @@ #include #include +#include + #include "DataFormats/Portable/interface/ProductBase.h" #include "HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" namespace edm { template @@ -30,7 +31,7 @@ namespace cms::alpakatools { * it. Here is a somewhat natural place. If the overhead is too much, we * can use them only where synchronization between queues is needed. */ - template >> + template >> class Product : public ProductBase { public: using Queue = TQueue; diff --git a/DataFormats/Portable/interface/ProductBase.h b/DataFormats/Portable/interface/ProductBase.h index 3ec5e4e554219..bb258af6570c0 100644 --- a/DataFormats/Portable/interface/ProductBase.h +++ b/DataFormats/Portable/interface/ProductBase.h @@ -8,7 +8,6 @@ #include #include "HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" namespace cms::alpakatools { @@ -16,7 +15,7 @@ namespace cms::alpakatools { * Base class for all instantiations of Product to hold the * non-T-dependent members. */ - template >> + template >> class ProductBase { public: using Queue = TQueue; diff --git a/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc b/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc index 3fd5dc72969c2..a70b9fb1d933b 100644 --- a/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc +++ b/DataFormats/SoATemplate/test/SoALayoutAndView_t.hip.cc @@ -103,7 +103,7 @@ using RangeCheckingHostDeviceView = // We expect to just run one thread. __global__ void rangeCheckKernel(RangeCheckingHostDeviceView soa) { - printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int) threadIdx.x); + printf("About to fail range-check (operator[]) in ROCm thread: %d\n", (int)threadIdx.x); [[maybe_unused]] auto si = soa[soa.metadata().size()]; printf("Fail: range-check failure should have stopped the kernel.\n"); } @@ -120,7 +120,7 @@ int main(void) { // Allocate buffer and store on host size_t hostDeviceSize = SoAHostDeviceLayout::computeDataSize(numElements); std::byte* h_buf = nullptr; - hipCheck(hipMallocHost((void**) &h_buf, hostDeviceSize)); + hipCheck(hipMallocHost((void**)&h_buf, hostDeviceSize)); SoAHostDeviceLayout h_soahdLayout(h_buf, numElements); SoAHostDeviceView h_soahd(h_soahdLayout); SoAHostDeviceConstView h_soahd_c(h_soahdLayout); @@ -128,7 +128,7 @@ int main(void) { // Alocate buffer, stores and views on the device (single, shared buffer). size_t deviceOnlySize = SoADeviceOnlyLayout::computeDataSize(numElements); std::byte* d_buf = nullptr; - hipCheck(hipMallocHost((void**) &d_buf, hostDeviceSize + deviceOnlySize)); + hipCheck(hipMallocHost((void**)&d_buf, hostDeviceSize + deviceOnlySize)); SoAHostDeviceLayout d_soahdLayout(d_buf, numElements); SoADeviceOnlyLayout d_soadoLayout(d_soahdLayout.metadata().nextByte(), numElements); SoAHostDeviceView d_soahdView(d_soahdLayout); diff --git a/HeterogeneousCore/AlpakaCore/interface/ContextState.h b/HeterogeneousCore/AlpakaCore/interface/ContextState.h index ff143b46c97f1..54617b05d889b 100644 --- a/HeterogeneousCore/AlpakaCore/interface/ContextState.h +++ b/HeterogeneousCore/AlpakaCore/interface/ContextState.h @@ -18,7 +18,7 @@ namespace cms::alpakatools { * information from ExternalWork's acquire() to producer() via a * member/QueueCache variable. */ - template >> + template >> class ContextState { public: using Queue = TQueue; diff --git a/HeterogeneousCore/AlpakaCore/interface/ScopedContext.h b/HeterogeneousCore/AlpakaCore/interface/ScopedContext.h index 2900e865d3b14..740414cd3bce2 100644 --- a/HeterogeneousCore/AlpakaCore/interface/ScopedContext.h +++ b/HeterogeneousCore/AlpakaCore/interface/ScopedContext.h @@ -5,6 +5,8 @@ #include #include +#include + #include "DataFormats/Portable/interface/Product.h" #include "FWCore/Concurrency/interface/WaitingTaskWithArenaHolder.h" #include "FWCore/Framework/interface/Event.h" @@ -99,7 +101,7 @@ namespace cms::alpakatools { ScopedContextHolderHelper(edm::WaitingTaskWithArenaHolder waitingTaskHolder) : waitingTaskHolder_{std::move(waitingTaskHolder)} {} - template >> + template >> void pushNextTask(F&& f, ContextState const* state) { replaceWaitingTaskHolder(edm::WaitingTaskWithArenaHolder{edm::make_waiting_task_with_holder( std::move(waitingTaskHolder_), [state, func = std::forward(f)](edm::WaitingTaskWithArenaHolder h) { @@ -111,7 +113,7 @@ namespace cms::alpakatools { waitingTaskHolder_ = std::move(waitingTaskHolder); } - template >> + template >> void enqueueCallback(TQueue& queue) { alpaka::enqueue(queue, alpaka::HostOnlyTask([holder = std::move(waitingTaskHolder_)]() { // The functor is required to be const, but the original waitingTaskHolder_ diff --git a/HeterogeneousCore/AlpakaCore/interface/chooseDevice.h b/HeterogeneousCore/AlpakaCore/interface/chooseDevice.h index 50eb29a36f4f2..cbe4295dfd3cc 100644 --- a/HeterogeneousCore/AlpakaCore/interface/chooseDevice.h +++ b/HeterogeneousCore/AlpakaCore/interface/chooseDevice.h @@ -1,6 +1,8 @@ #ifndef HeterogeneousCore_AlpakaCore_interface_chooseDevice_h #define HeterogeneousCore_AlpakaCore_interface_chooseDevice_h +#include + #include "FWCore/ServiceRegistry/interface/Service.h" #include "FWCore/Utilities/interface/StreamID.h" #include "HeterogeneousCore/AlpakaInterface/interface/config.h" @@ -10,7 +12,7 @@ namespace cms::alpakatools { - template >> + template >> alpaka::Dev const& chooseDevice(edm::StreamID id) { edm::Service service; if (not service->enabled()) { diff --git a/HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadata.cc b/HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadata.cc index fcb037f07c070..16ff44581586c 100644 --- a/HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadata.cc +++ b/HeterogeneousCore/AlpakaCore/src/alpaka/EDMetadata.cc @@ -1,3 +1,5 @@ +#include + #include "FWCore/Utilities/interface/EDMException.h" #include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDMetadata.h" diff --git a/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h b/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h index 099469f67a6c4..d5b932e773af0 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h +++ b/HeterogeneousCore/AlpakaInterface/interface/AllocatorPolicy.h @@ -13,7 +13,7 @@ namespace cms::alpakatools { // - Caching: (device and host) caching allocator enum class AllocatorPolicy { Synchronous = 0, Asynchronous = 1, Caching = 2 }; - template >> + template >> constexpr inline AllocatorPolicy allocator_policy = AllocatorPolicy::Synchronous; #if defined ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED || defined ALPAKA_ACC_CPU_B_TBB_T_SEQ_ENABLED diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h index 3f6ba61237448..05d9bf55b1bdf 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h @@ -18,7 +18,7 @@ namespace cms::alpakatools { typename TDev, typename TQueue, typename = void, - typename = std::enable_if_t and cms::alpakatools::is_queue_v>> + typename = std::enable_if_t and alpaka::isQueue>> struct CachedBufAlloc { static_assert(alpaka::meta::DependentFalseType::value, "This device does not support a caching allocator"); }; @@ -193,7 +193,7 @@ namespace cms::alpakatools { typename TExtent, typename TQueue, typename TDev, - typename = std::enable_if_t and cms::alpakatools::is_queue_v>> + typename = std::enable_if_t and alpaka::isQueue>> ALPAKA_FN_HOST auto allocCachedBuf(TDev const& dev, TQueue queue, TExtent const& extent = TExtent()) { return traits::CachedBufAlloc, TIdx, TDev, TQueue>::allocCachedBuf(dev, queue, extent); } diff --git a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h index 589950ae6c018..ba6405ecddf6f 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h @@ -84,7 +84,7 @@ namespace cms::alpakatools { template and cms::alpakatools::is_queue_v>> + typename = std::enable_if_t and alpaka::isQueue>> class CachingAllocator { public: #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED diff --git a/HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h b/HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h index 784561be2bfcb..64d699bdd34c7 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h +++ b/HeterogeneousCore/AlpakaInterface/interface/ScopedContextFwd.h @@ -1,7 +1,7 @@ #ifndef HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h #define HeterogeneousCore_AlpakaInterface_interface_ScopedContextFwd_h -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" +#include // Forward declaration of the alpaka framework Context classes // @@ -11,23 +11,23 @@ namespace cms::alpakatools { namespace impl { - template >> + template >> class ScopedContextBase; - template >> + template >> class ScopedContextGetterBase; } // namespace impl - template >> + template >> class ScopedContextAcquire; - template >> + template >> class ScopedContextProduce; - template >> + template >> class ScopedContextTask; - template >> + template >> class ScopedContextAnalyze; } // namespace cms::alpakatools diff --git a/HeterogeneousCore/AlpakaInterface/interface/devices.h b/HeterogeneousCore/AlpakaInterface/interface/devices.h index 7787f23d2d978..cfe907a76ac7c 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/devices.h +++ b/HeterogeneousCore/AlpakaInterface/interface/devices.h @@ -7,13 +7,12 @@ #include #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "HeterogeneousCore/AlpakaInterface/interface/traits.h" namespace cms::alpakatools { namespace detail { - template >> + template >> inline std::vector> enumerate_devices() { using Platform = TPlatform; using Device = alpaka::Dev; @@ -32,7 +31,7 @@ namespace cms::alpakatools { } // namespace detail // return the alpaka accelerator devices for the given platform - template >> + template >> inline std::vector> const& devices() { static const auto devices = detail::enumerate_devices(); return devices; diff --git a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h index f47e7637fa2de..06b4453a68502 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/getDeviceCachingAllocator.h @@ -4,6 +4,8 @@ #include #include +#include + #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h" #include "HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h" @@ -16,7 +18,7 @@ namespace cms::alpakatools { template and cms::alpakatools::is_queue_v>> + typename = std::enable_if_t and alpaka::isQueue>> auto allocate_device_allocators() { using Allocator = CachingAllocator; auto const& devices = cms::alpakatools::devices>(); @@ -72,7 +74,7 @@ namespace cms::alpakatools { template and cms::alpakatools::is_queue_v>> + typename = std::enable_if_t and alpaka::isQueue>> inline CachingAllocator& getDeviceCachingAllocator(TDev const& device) { // initialise all allocators, one per device CMS_THREAD_SAFE static auto allocators = detail::allocate_device_allocators(); diff --git a/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h b/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h index 78d42a4de938a..0950906f67737 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h +++ b/HeterogeneousCore/AlpakaInterface/interface/getHostCachingAllocator.h @@ -1,6 +1,8 @@ #ifndef HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h #define HeterogeneousCore_AlpakaInterface_interface_getHostCachingAllocator_h +#include + #include "FWCore/Utilities/interface/thread_safety_macros.h" #include "HeterogeneousCore/AlpakaInterface/interface/AllocatorConfig.h" #include "HeterogeneousCore/AlpakaInterface/interface/CachingAllocator.h" @@ -10,7 +12,7 @@ namespace cms::alpakatools { - template >> + template >> inline CachingAllocator& getHostCachingAllocator() { // thread safe initialisation of the host allocator CMS_THREAD_SAFE static CachingAllocator allocator( diff --git a/HeterogeneousCore/AlpakaInterface/interface/memory.h b/HeterogeneousCore/AlpakaInterface/interface/memory.h index 193b5016ff1d0..757f3fa048b98 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/memory.h +++ b/HeterogeneousCore/AlpakaInterface/interface/memory.h @@ -18,7 +18,7 @@ namespace cms::alpakatools { // type deduction helpers namespace detail { - template >> + template >> struct buffer_type { using type = alpaka::Buf; }; @@ -33,7 +33,7 @@ namespace cms::alpakatools { using type = alpaka::Buf; }; - template >> + template >> struct view_type { using type = alpaka::ViewPlainPtr; }; @@ -101,7 +101,8 @@ namespace cms::alpakatools { // the memory is pinned according to the device associated to the queue template - std::enable_if_t and not std::is_array_v, host_buffer> make_host_buffer(TQueue const& queue) { + std::enable_if_t and not std::is_array_v, host_buffer> make_host_buffer( + TQueue const& queue) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { return allocCachedBuf(host(), queue, Scalar{}); } else { @@ -110,7 +111,8 @@ namespace cms::alpakatools { } template - std::enable_if_t and cms::is_unbounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_unbounded_array_v and + not std::is_array_v>, host_buffer> make_host_buffer(TQueue const& queue, Extent extent) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { @@ -122,7 +124,8 @@ namespace cms::alpakatools { } template - std::enable_if_t and cms::is_bounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_bounded_array_v and + not std::is_array_v>, host_buffer> make_host_buffer(TQueue const& queue) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { @@ -162,29 +165,31 @@ namespace cms::alpakatools { // scalar and 1-dimensional device buffers - template >> + template >> using device_buffer = typename detail::buffer_type::type; - template >> + template >> using const_device_buffer = alpaka::ViewConst>; // non-cached, scalar and 1-dimensional device buffers template - std::enable_if_t and not std::is_array_v, device_buffer> make_device_buffer( + std::enable_if_t and not std::is_array_v, device_buffer> make_device_buffer( TDev const& device) { return alpaka::allocBuf(device, Scalar{}); } template - std::enable_if_t and cms::is_unbounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_unbounded_array_v and + not std::is_array_v>, device_buffer> make_device_buffer(TDev const& device, Extent extent) { return alpaka::allocBuf, Idx>(device, Vec1D{extent}); } template - std::enable_if_t and cms::is_bounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_bounded_array_v and + not std::is_array_v>, device_buffer> make_device_buffer(TDev const& device) { return alpaka::allocBuf, Idx>(device, Vec1D{std::extent_v}); @@ -193,7 +198,7 @@ namespace cms::alpakatools { // potentially-cached, scalar and 1-dimensional device buffers with queue-ordered semantic template - std::enable_if_t and not std::is_array_v, device_buffer, T>> + std::enable_if_t and not std::is_array_v, device_buffer, T>> make_device_buffer(TQueue const& queue) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { return allocCachedBuf(alpaka::getDev(queue), queue, Scalar{}); @@ -207,7 +212,8 @@ namespace cms::alpakatools { } template - std::enable_if_t and cms::is_unbounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_unbounded_array_v and + not std::is_array_v>, device_buffer, T>> make_device_buffer(TQueue const& queue, Extent extent) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { @@ -222,7 +228,8 @@ namespace cms::alpakatools { } template - std::enable_if_t and cms::is_bounded_array_v and not std::is_array_v>, + std::enable_if_t and cms::is_bounded_array_v and + not std::is_array_v>, device_buffer, T>> make_device_buffer(TQueue const& queue) { if constexpr (allocator_policy> == AllocatorPolicy::Caching) { @@ -238,7 +245,7 @@ namespace cms::alpakatools { // scalar and 1-dimensional device views - template >> + template >> using device_view = typename detail::view_type::type; template diff --git a/HeterogeneousCore/AlpakaInterface/interface/traits.h b/HeterogeneousCore/AlpakaInterface/interface/traits.h index 3083cda79833d..c469daf60bd92 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/traits.h +++ b/HeterogeneousCore/AlpakaInterface/interface/traits.h @@ -25,42 +25,4 @@ namespace cms { inline constexpr bool is_unbounded_array_v = is_unbounded_array::value; } // namespace cms -#include - -namespace cms::alpakatools { - - // is_platform - - template - using is_platform = alpaka::concepts::ImplementsConcept; - - template - inline constexpr bool is_platform_v = is_platform::value; - - // is_device - - template - using is_device = alpaka::concepts::ImplementsConcept; - - template - inline constexpr bool is_device_v = is_device::value; - - // is_accelerator - - template - using is_accelerator = alpaka::concepts::ImplementsConcept; - - template - inline constexpr bool is_accelerator_v = is_accelerator::value; - - // is_queue - - template - using is_queue = alpaka::concepts::ImplementsConcept; - - template - inline constexpr bool is_queue_v = is_queue::value; - -} // namespace cms::alpakatools - #endif // HeterogeneousCore_AlpakaInterface_interface_traits_h diff --git a/HeterogeneousCore/AlpakaInterface/interface/vec.h b/HeterogeneousCore/AlpakaInterface/interface/vec.h deleted file mode 100644 index 4126eecf78cf2..0000000000000 --- a/HeterogeneousCore/AlpakaInterface/interface/vec.h +++ /dev/null @@ -1,42 +0,0 @@ -#ifndef HeterogeneousCore_AlpakaInterface_interface_vec_h -#define HeterogeneousCore_AlpakaInterface_interface_vec_h - -#include - -#include - -namespace alpaka { - - //! \return The element-wise minimum of one or more vectors. - ALPAKA_NO_HOST_ACC_WARNING - template , Vecs> && ...)>> - ALPAKA_FN_HOST_ACC constexpr auto elementwise_min(Vec const& p, Vecs const&... qs) -> Vec { - Vec r; - if constexpr (TDim::value > 0) { - for (typename TDim::value_type i = 0; i < TDim::value; ++i) - r[i] = std::min({p[i], qs[i]...}); - } - return r; - } - - //! \return The element-wise maximum of one or more vectors. - ALPAKA_NO_HOST_ACC_WARNING - template , Vecs> && ...)>> - ALPAKA_FN_HOST_ACC constexpr auto elementwise_max(Vec const& p, Vecs const&... qs) -> Vec { - Vec r; - if constexpr (TDim::value > 0) { - for (typename TDim::value_type i = 0; i < TDim::value; ++i) - r[i] = std::max({p[i], qs[i]...}); - } - return r; - } - -} // namespace alpaka - -#endif // HeterogeneousCore_AlpakaInterface_interface_vec_h diff --git a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h index 7ed55e7d60ebe..0d295855976da 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/workdivision.h +++ b/HeterogeneousCore/AlpakaInterface/interface/workdivision.h @@ -7,7 +7,6 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/traits.h" -#include "HeterogeneousCore/AlpakaInterface/interface/vec.h" namespace cms::alpakatools { @@ -20,7 +19,7 @@ namespace cms::alpakatools { inline constexpr Idx divide_up_by(Idx value, Idx divisor) { return (value + divisor - 1) / divisor; } // Trait describing whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped - template >> + template >> struct requires_single_thread_per_block : public std::true_type {}; #ifdef ALPAKA_ACC_GPU_CUDA_ENABLED @@ -34,12 +33,11 @@ namespace cms::alpakatools { #endif // ALPAKA_ACC_GPU_HIP_ENABLED // Whether or not the accelerator expects the threads-per-block and elements-per-thread to be swapped - template >> + template >> inline constexpr bool requires_single_thread_per_block_v = requires_single_thread_per_block::value; // Create an accelerator-dependent work division for 1-dimensional kernels - template and alpaka::Dim::value == 1>> + template and alpaka::Dim::value == 1>> inline WorkDiv make_workdiv(Idx blocks, Idx elements) { if constexpr (not requires_single_thread_per_block_v) { // On GPU backends, each thread is looking at a single element: @@ -55,7 +53,7 @@ namespace cms::alpakatools { } // Create the accelerator-dependent workdiv for N-dimensional kernels - template >> + template >> inline WorkDiv> make_workdiv(const Vec>& blocks, const Vec>& elements) { using Dim = alpaka::Dim; @@ -72,8 +70,7 @@ namespace cms::alpakatools { } } - template and alpaka::Dim::value == 1>> + template and alpaka::Dim::value == 1>> class elements_with_stride { public: ALPAKA_FN_ACC inline elements_with_stride(TAcc const& acc) @@ -160,8 +157,7 @@ namespace cms::alpakatools { const Idx extent_; }; - template and (alpaka::Dim::value > 0)>> + template and (alpaka::Dim::value > 0)>> class elements_with_stride_nd { public: using Dim = alpaka::Dim; diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc index a1125435e7440..ee3f0e2844acd 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testKernel.dev.cc @@ -8,7 +8,6 @@ #include "HeterogeneousCore/AlpakaInterface/interface/config.h" #include "HeterogeneousCore/AlpakaInterface/interface/memory.h" -#include "HeterogeneousCore/AlpakaInterface/interface/vec.h" #include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" // each test binary is built for a single Alpaka backend diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testVec.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testVec.cc index 0cae50ba0e5e5..ed7b29824431b 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testVec.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testVec.cc @@ -4,7 +4,6 @@ #include #include "HeterogeneousCore/AlpakaInterface/interface/config.h" -#include "HeterogeneousCore/AlpakaInterface/interface/vec.h" static constexpr auto s_tag = "[" ALPAKA_TYPE_ALIAS_NAME(alpakaTestVec) "]"; diff --git a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc index 7f94713470e5a..6bdb36e0e57a3 100644 --- a/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc +++ b/HeterogeneousCore/AlpakaTest/plugins/alpaka/TestAlgo.dev.cc @@ -18,7 +18,7 @@ namespace ALPAKA_ACCELERATOR_NAMESPACE { class TestAlgoKernel { public: - template >> + template >> ALPAKA_FN_ACC void operator()(TAcc const& acc, portabletest::TestDeviceCollection::View view, int32_t size,