From 42794f5a294f08faf9c72ec1cfd6d64827c47685 Mon Sep 17 00:00:00 2001 From: Andrea Bocci Date: Tue, 23 Jan 2024 15:07:39 +0100 Subject: [PATCH] Update prefixScan, OneToManyAssoc and HistoContainer for Alpaka 1.1.0 --- .../interface/HistoContainer.h | 32 +++++++------- .../interface/OneToManyAssoc.h | 42 +++++++++---------- .../AlpakaInterface/interface/prefixScan.h | 24 +++++------ .../AlpakaInterface/interface/radixSort.h | 10 ++--- .../test/alpaka/testHistoContainer.dev.cc | 24 +++++------ .../test/alpaka/testOneHistoContainer.dev.cc | 26 ++++++------ .../test/alpaka/testOneToManyAssoc.dev.cc | 20 ++++----- .../test/alpaka/testPrefixScan.dev.cc | 26 ++++++------ 8 files changed, 102 insertions(+), 102 deletions(-) diff --git a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h index 9535abad90c01..b122ccc5a54ee 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h +++ b/HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h @@ -27,10 +27,10 @@ namespace cms::alpakatools { const uint32_t nt = offsets[nh]; for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); - ALPAKA_ASSERT_OFFLOAD((*off) > 0); + ALPAKA_ASSERT_ACC((*off) > 0); int32_t ih = off - offsets - 1; - ALPAKA_ASSERT_OFFLOAD(ih >= 0); - ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); + ALPAKA_ASSERT_ACC(ih >= 0); + ALPAKA_ASSERT_ACC(ih < int(nh)); h->count(acc, v[i], ih); } } @@ -46,10 +46,10 @@ namespace cms::alpakatools { const uint32_t nt = offsets[nh]; for (uint32_t i : uniform_elements(acc, nt)) { auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i); - ALPAKA_ASSERT_OFFLOAD((*off) > 0); + ALPAKA_ASSERT_ACC((*off) > 0); int32_t ih = off - offsets - 1; - ALPAKA_ASSERT_OFFLOAD(ih >= 0); - ALPAKA_ASSERT_OFFLOAD(ih < int(nh)); + ALPAKA_ASSERT_ACC(ih >= 0); + ALPAKA_ASSERT_ACC(ih < int(nh)); h->fill(acc, v[i], i, ih); } } @@ -102,7 +102,7 @@ namespace cms::alpakatools { int bs = Hist::bin(value); int be = std::min(int(Hist::nbins() - 1), bs + n); bs = std::max(0, bs - n); - ALPAKA_ASSERT_OFFLOAD(be >= bs); + ALPAKA_ASSERT_ACC(be >= bs); for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { func(*pj); } @@ -113,7 +113,7 @@ namespace cms::alpakatools { ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) { auto bs = Hist::bin(wmin); auto be = Hist::bin(wmax); - ALPAKA_ASSERT_OFFLOAD(be >= bs); + ALPAKA_ASSERT_ACC(be >= bs); for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) { func(*pj); } @@ -164,36 +164,36 @@ namespace cms::alpakatools { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); Base::atomicIncrement(acc, this->off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); auto w = Base::atomicDecrement(acc, this->off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); this->content[w - 1] = j; } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); b += histOff(nh); - ALPAKA_ASSERT_OFFLOAD(b < totbins()); + ALPAKA_ASSERT_ACC(b < totbins()); Base::atomicIncrement(acc, this->off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) { uint32_t b = bin(t); - ALPAKA_ASSERT_OFFLOAD(b < nbins()); + ALPAKA_ASSERT_ACC(b < nbins()); b += histOff(nh); - ALPAKA_ASSERT_OFFLOAD(b < totbins()); + ALPAKA_ASSERT_ACC(b < totbins()); auto w = Base::atomicDecrement(acc, this->off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); this->content[w - 1] = j; } }; diff --git a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h index 866564d3f896e..a914f0989dc88 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h +++ b/HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h @@ -42,15 +42,15 @@ namespace cms::alpakatools { constexpr auto capacity() const { return content.capacity(); } ALPAKA_FN_HOST_ACC void initStorage(View view) { - ALPAKA_ASSERT_OFFLOAD(view.assoc == this); + ALPAKA_ASSERT_ACC(view.assoc == this); if constexpr (ctCapacity() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.contentStorage); - ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0); + ALPAKA_ASSERT_ACC(view.contentStorage); + ALPAKA_ASSERT_ACC(view.contentSize > 0); content.init(view.contentStorage, view.contentSize); } if constexpr (ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); off.init(view.offStorage, view.offSize); } } @@ -80,15 +80,15 @@ namespace cms::alpakatools { template ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) { - ALPAKA_ASSERT_OFFLOAD(b < static_cast(nOnes())); + ALPAKA_ASSERT_ACC(b < static_cast(nOnes())); atomicIncrement(acc, off[b]); } template ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) { - ALPAKA_ASSERT_OFFLOAD(b < static_cast(nOnes())); + ALPAKA_ASSERT_ACC(b < static_cast(nOnes())); auto w = atomicDecrement(acc, off[b]); - ALPAKA_ASSERT_OFFLOAD(w > 0); + ALPAKA_ASSERT_ACC(w > 0); content[w - 1] = j; } @@ -96,8 +96,8 @@ namespace cms::alpakatools { struct zeroAndInit { template ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const { - ALPAKA_ASSERT_OFFLOAD((1 == alpaka::getWorkDiv(acc)[0])); - ALPAKA_ASSERT_OFFLOAD((0 == alpaka::getIdx(acc)[0])); + ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv(acc)[0])); + ALPAKA_ASSERT_ACC((0 == alpaka::getIdx(acc)[0])); auto h = view.assoc; if (cms::alpakatools::once_per_block(acc)) { h->psws = 0; @@ -119,12 +119,12 @@ namespace cms::alpakatools { template ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) { if constexpr (ctCapacity() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.contentStorage); - ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0); + ALPAKA_ASSERT_ACC(view.contentStorage); + ALPAKA_ASSERT_ACC(view.contentSize > 0); } if constexpr (ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); } if constexpr (!requires_single_thread_per_block_v) { auto nthreads = 1024; @@ -133,7 +133,7 @@ namespace cms::alpakatools { alpaka::exec(queue, workDiv, zeroAndInit{}, view); } else { auto h = view.assoc; - ALPAKA_ASSERT_OFFLOAD(h); + ALPAKA_ASSERT_ACC(h); h->initStorage(view); h->zero(); h->psws = 0; @@ -213,9 +213,9 @@ namespace cms::alpakatools { template ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) { - ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == 0); + ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0); blockPrefixScan(acc, this->off.data(), this->totOnes(), ws); - ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]); + ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]); } ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() { @@ -234,17 +234,17 @@ namespace cms::alpakatools { ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) { // View stores a base pointer, we need to upcast back... auto h = static_cast(view.assoc); - ALPAKA_ASSERT_OFFLOAD(h); + ALPAKA_ASSERT_ACC(h); if constexpr (!requires_single_thread_per_block_v) { Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off)); auto nOnes = OneToManyAssocRandomAccess::ctNOnes(); if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) { - ALPAKA_ASSERT_OFFLOAD(view.offStorage); - ALPAKA_ASSERT_OFFLOAD(view.offSize > 0); + ALPAKA_ASSERT_ACC(view.offStorage); + ALPAKA_ASSERT_ACC(view.offSize > 0); nOnes = view.offSize; poff = view.offStorage; } - ALPAKA_ASSERT_OFFLOAD(nOnes > 0); + ALPAKA_ASSERT_ACC(nOnes > 0); int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws)); auto nthreads = 1024; auto nblocks = (nOnes + nthreads - 1) / nthreads; diff --git a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h index 5af78500f3ca3..afaddcc7f5473 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h +++ b/HeterogeneousCore/AlpakaInterface/interface/prefixScan.h @@ -50,11 +50,11 @@ namespace cms::alpakatools { const auto warpSize = alpaka::warp::getSize(acc); int32_t const blockDimension(alpaka::getWorkDiv(acc)[0u]); int32_t const blockThreadIdx(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(ws); - ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize); - ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize); + ALPAKA_ASSERT_ACC(ws); + ALPAKA_ASSERT_ACC(size <= warpSize * warpSize); + ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize); auto first = blockThreadIdx; - ALPAKA_ASSERT_OFFLOAD(isPowerOf2(warpSize)); + ALPAKA_ASSERT_ACC(isPowerOf2(warpSize)); auto laneId = blockThreadIdx & (warpSize - 1); auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize; @@ -64,7 +64,7 @@ namespace cms::alpakatools { if (i < size) { // Skipped in warp padding threads. auto warpId = i / warpSize; - ALPAKA_ASSERT_OFFLOAD(warpId < warpSize); + ALPAKA_ASSERT_ACC(warpId < warpSize); if ((warpSize - 1) == laneId) ws[warpId] = co[i]; } @@ -97,9 +97,9 @@ namespace cms::alpakatools { const auto warpSize = alpaka::warp::getSize(acc); int32_t const blockDimension(alpaka::getWorkDiv(acc)[0u]); int32_t const blockThreadIdx(alpaka::getIdx(acc)[0u]); - ALPAKA_ASSERT_OFFLOAD(ws); - ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize); - ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize); + ALPAKA_ASSERT_ACC(ws); + ALPAKA_ASSERT_ACC(size <= warpSize * warpSize); + ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize); auto first = blockThreadIdx; auto laneId = blockThreadIdx & (warpSize - 1); auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize; @@ -110,7 +110,7 @@ namespace cms::alpakatools { if (i < size) { // Skipped in warp padding threads. auto warpId = i / warpSize; - ALPAKA_ASSERT_OFFLOAD(warpId < warpSize); + ALPAKA_ASSERT_ACC(warpId < warpSize); if ((warpSize - 1) == laneId) ws[warpId] = c[i]; } @@ -144,14 +144,14 @@ namespace cms::alpakatools { if constexpr (!requires_single_thread_per_block_v) { ws = alpaka::getDynSharedMem(acc); } - ALPAKA_ASSERT_OFFLOAD(warpSize == static_cast(alpaka::warp::getSize(acc))); + ALPAKA_ASSERT_ACC(warpSize == static_cast(alpaka::warp::getSize(acc))); [[maybe_unused]] const auto elementsPerGrid = alpaka::getWorkDiv(acc)[0u]; const auto elementsPerBlock = alpaka::getWorkDiv(acc)[0u]; const auto threadsPerBlock = alpaka::getWorkDiv(acc)[0u]; const auto blocksPerGrid = alpaka::getWorkDiv(acc)[0u]; const auto blockIdx = alpaka::getIdx(acc)[0u]; const auto threadIdx = alpaka::getIdx(acc)[0u]; - ALPAKA_ASSERT_OFFLOAD(elementsPerGrid >= size); + ALPAKA_ASSERT_ACC(elementsPerGrid >= size); // first each block does a scan [[maybe_unused]] int off = elementsPerBlock * blockIdx; if (size - off > 0) { @@ -172,7 +172,7 @@ namespace cms::alpakatools { if (!isLastBlockDone) return; - ALPAKA_ASSERT_OFFLOAD(int(blocksPerGrid) == *pc); + ALPAKA_ASSERT_ACC(int(blocksPerGrid) == *pc); // good each block has done its work and now we are left in last block diff --git a/HeterogeneousCore/AlpakaInterface/interface/radixSort.h b/HeterogeneousCore/AlpakaInterface/interface/radixSort.h index 0f94ad200efd9..f9b26cf3d17ae 100644 --- a/HeterogeneousCore/AlpakaInterface/interface/radixSort.h +++ b/HeterogeneousCore/AlpakaInterface/interface/radixSort.h @@ -132,9 +132,9 @@ namespace cms::alpakatools { auto& ibs = alpaka::declareSharedVar(acc); auto& currentSortingPass = alpaka::declareSharedVar(acc); - ALPAKA_ASSERT_OFFLOAD(size > 0); + ALPAKA_ASSERT_ACC(size > 0); // TODO: is this a hard requirement? - ALPAKA_ASSERT_OFFLOAD(blockDimension >= binsNumber); + ALPAKA_ASSERT_ACC(blockDimension >= binsNumber); currentSortingPass = initialSortingPass; @@ -283,7 +283,7 @@ namespace cms::alpakatools { */ alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(c[0] == 0); + ALPAKA_ASSERT_ACC(c[0] == 0); // swap (local, ok) auto t = j; @@ -297,8 +297,8 @@ namespace cms::alpakatools { } if ((dataBits != 8) && (0 == (NS & 1))) - ALPAKA_ASSERT_OFFLOAD(j == - ind); // dataBits/binBits is even so ind is correct (the result is in the right location) + ALPAKA_ASSERT_ACC(j == + ind); // dataBits/binBits is even so ind is correct (the result is in the right location) // TODO this copy is (doubly?) redundant with the reorder if (j != ind) // odd number of sorting passes, we need to move the result to the right array (ind[]) diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc index 57b80cc9cf275..026c4bc5866e2 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testHistoContainer.dev.cc @@ -38,12 +38,12 @@ void checkContents(Hist* h, #ifndef NDEBUG [[maybe_unused]] auto bk = h->bin(v[k]); #endif - ALPAKA_ASSERT_OFFLOAD(bk == i); - ALPAKA_ASSERT_OFFLOAD(k < offsets[j + 1]); + ALPAKA_ASSERT_ACC(bk == i); + ALPAKA_ASSERT_ACC(k < offsets[j + 1]); auto kl = h->bin(v[k] - window); auto kh = h->bin(v[k] + window); - ALPAKA_ASSERT_OFFLOAD(kl != i); - ALPAKA_ASSERT_OFFLOAD(kh != i); + ALPAKA_ASSERT_ACC(kl != i); + ALPAKA_ASSERT_ACC(kh != i); // std::cout << kl << ' ' << kh << std::endl; auto me = v[k]; @@ -81,7 +81,7 @@ void checkContents(Hist* h, std::cout << "what? " << j << ' ' << i << ' ' << int(me) << '/' << (int)T(me - window) << '/' << (int)T(me + window) << ": " << kl << '/' << kh << ' ' << khh << ' ' << tot << '/' << nm << std::endl; - ALPAKA_ASSERT_OFFLOAD(!l); + ALPAKA_ASSERT_ACC(!l); } } int status; @@ -133,7 +133,7 @@ int go(const DevHost& host, const Device& device, Queue& queue) { offsets[0] = 0; for (uint32_t j = 1; j < nParts + 1; ++j) { offsets[j] = offsets[j - 1] + partSize - 3 * j; - ALPAKA_ASSERT_OFFLOAD(offsets[j] <= N); + ALPAKA_ASSERT_ACC(offsets[j] <= N); } if (it == 1) { // special cases... @@ -210,14 +210,14 @@ int go(const DevHost& host, const Device& device, Queue& queue) { // std::cout << offsets[i] <<" - "<< h->size() << std::endl; // } - ALPAKA_ASSERT_OFFLOAD(0 == h->off[0]); - ALPAKA_ASSERT_OFFLOAD(offsets[10] == h->size()); - ALPAKA_ASSERT_OFFLOAD(0 == hr->off[0]); - ALPAKA_ASSERT_OFFLOAD(offsets[10] == hr->size()); + ALPAKA_ASSERT_ACC(0 == h->off[0]); + ALPAKA_ASSERT_ACC(offsets[10] == h->size()); + ALPAKA_ASSERT_ACC(0 == hr->off[0]); + ALPAKA_ASSERT_ACC(offsets[10] == hr->size()); auto verify = [&](uint32_t i, uint32_t k, uint32_t t1, uint32_t t2) { - ALPAKA_ASSERT_OFFLOAD(t1 < N); - ALPAKA_ASSERT_OFFLOAD(t2 < N); + ALPAKA_ASSERT_ACC(t1 < N); + ALPAKA_ASSERT_ACC(t2 < N); if (T(v[t1] - v[t2]) <= 0) std::cout << "for " << i << ':' << v[k] << " failed " << v[t1] << ' ' << v[t2] << std::endl; }; diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc index b032939f9870b..20639da606d0d 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneHistoContainer.dev.cc @@ -15,8 +15,8 @@ template struct mykernel { template ALPAKA_FN_ACC void operator()(const TAcc& acc, T const* __restrict__ v, uint32_t N) const { - ALPAKA_ASSERT_OFFLOAD(v); - ALPAKA_ASSERT_OFFLOAD(N == 12000); + ALPAKA_ASSERT_ACC(v); + ALPAKA_ASSERT_ACC(N == 12000); const uint32_t threadIdxLocal(alpaka::getIdx(acc)[0u]); if (threadIdxLocal == 0) { @@ -46,18 +46,18 @@ struct mykernel { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(0 == hist.size()); + ALPAKA_ASSERT_ACC(0 == hist.size()); alpaka::syncBlockThreads(acc); // finalize hist.finalize(acc, ws); alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(N == hist.size()); + ALPAKA_ASSERT_ACC(N == hist.size()); // verify for ([[maybe_unused]] auto j : uniform_elements(acc, Hist::nbins())) { - ALPAKA_ASSERT_OFFLOAD(hist.off[j] <= hist.off[j + 1]); + ALPAKA_ASSERT_ACC(hist.off[j] <= hist.off[j + 1]); } alpaka::syncBlockThreads(acc); @@ -72,17 +72,17 @@ struct mykernel { } alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(0 == hist.off[0]); - ALPAKA_ASSERT_OFFLOAD(N == hist.size()); + ALPAKA_ASSERT_ACC(0 == hist.off[0]); + ALPAKA_ASSERT_ACC(N == hist.size()); // bin #ifndef NDEBUG for (auto j : uniform_elements(acc, hist.size() - 1)) { auto p = hist.begin() + j; - ALPAKA_ASSERT_OFFLOAD((*p) < N); + ALPAKA_ASSERT_ACC((*p) < N); [[maybe_unused]] auto k1 = Hist::bin(v[*p]); [[maybe_unused]] auto k2 = Hist::bin(v[*(p + 1)]); - ALPAKA_ASSERT_OFFLOAD(k2 >= k1); + ALPAKA_ASSERT_ACC(k2 >= k1); } #endif @@ -95,13 +95,13 @@ struct mykernel { #endif [[maybe_unused]] int tot = 0; auto ftest = [&](unsigned int k) { - ALPAKA_ASSERT_OFFLOAD(k < N); + ALPAKA_ASSERT_ACC(k < N); ++tot; }; forEachInWindow(hist, v[j], v[j], ftest); #ifndef NDEBUG [[maybe_unused]] int rtot = hist.size(b0); - ALPAKA_ASSERT_OFFLOAD(tot == rtot); + ALPAKA_ASSERT_ACC(tot == rtot); #endif tot = 0; auto vm = int(v[j]) - DELTA; @@ -111,13 +111,13 @@ struct mykernel { vm = std::min(vm, vmax); vp = std::min(vp, vmax); vp = std::max(vp, 0); - ALPAKA_ASSERT_OFFLOAD(vp >= vm); + ALPAKA_ASSERT_ACC(vp >= vm); forEachInWindow(hist, vm, vp, ftest); #ifndef NDEBUG int bp = Hist::bin(vp); int bm = Hist::bin(vm); rtot = hist.end(bp) - hist.begin(bm); - ALPAKA_ASSERT_OFFLOAD(tot == rtot); + ALPAKA_ASSERT_ACC(tot == rtot); #endif } } diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc index 492911e6b1a57..d50b7830ad8f3 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testOneToManyAssoc.dev.cc @@ -69,7 +69,7 @@ struct verifyMulti { template ALPAKA_FN_ACC void operator()(const TAcc& acc, Multiplicity* __restrict__ m1, Multiplicity* __restrict__ m2) const { for ([[maybe_unused]] auto i : uniform_elements(acc, Multiplicity{}.totOnes())) { - ALPAKA_ASSERT_OFFLOAD(m1->off[i] == m2->off[i]); + ALPAKA_ASSERT_ACC(m1->off[i] == m2->off[i]); } } }; @@ -83,7 +83,7 @@ struct count { for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; - ALPAKA_ASSERT_OFFLOAD(j < 4); + ALPAKA_ASSERT_ACC(j < 4); if (k >= n) { return; } @@ -103,7 +103,7 @@ struct fill { for (auto i : uniform_elements(acc, 4 * n)) { auto k = i / 4; auto j = i - 4 * k; - ALPAKA_ASSERT_OFFLOAD(j < 4); + ALPAKA_ASSERT_ACC(j < 4); if (k >= n) { return; } @@ -117,7 +117,7 @@ struct fill { struct verify { template ALPAKA_FN_ACC void operator()(const TAcc& acc, Assoc* __restrict__ assoc) const { - ALPAKA_ASSERT_OFFLOAD(assoc->size() < Assoc{}.capacity()); + ALPAKA_ASSERT_ACC(assoc->size() < Assoc{}.capacity()); } }; @@ -138,7 +138,7 @@ struct verifyBulk { if (::toSigned(apc->get().first) >= Assoc::ctNOnes()) { printf("Overflow %d %d\n", apc->get().first, Assoc::ctNOnes()); } - ALPAKA_ASSERT_OFFLOAD(toSigned(assoc->size()) < Assoc::ctCapacity()); + ALPAKA_ASSERT_ACC(toSigned(assoc->size()) < Assoc::ctCapacity()); } }; @@ -197,8 +197,8 @@ int main() { } ++z; } - ALPAKA_ASSERT_OFFLOAD(n <= MaxElem); - ALPAKA_ASSERT_OFFLOAD(j <= N); + ALPAKA_ASSERT_ACC(n <= MaxElem); + ALPAKA_ASSERT_ACC(j <= N); } std::cout << "filled with " << n << " elements " << double(ave) / n << ' ' << imax << ' ' << nz << std::endl; @@ -239,7 +239,7 @@ int main() { ave += x; imax = std::max(imax, int(x)); } - ALPAKA_ASSERT_OFFLOAD(0 == ara_h->size(n)); + ALPAKA_ASSERT_ACC(0 == ara_h->size(n)); std::cout << "found with " << n << " elements " << double(ave) / n << ' ' << imax << ' ' << z << std::endl; // now the inverse map (actually this is the direct....) @@ -289,11 +289,11 @@ int main() { if (!(x == 4 || x == 3)) { std::cout << "i=" << i << " x=" << x << std::endl; } - ALPAKA_ASSERT_OFFLOAD(x == 4 || x == 3); + ALPAKA_ASSERT_ACC(x == 4 || x == 3); ave += x; imax = std::max(imax, int(x)); } - ALPAKA_ASSERT_OFFLOAD(0 == as_h->size(N)); + ALPAKA_ASSERT_ACC(0 == as_h->size(N)); std::cout << "found with ave occupancy " << double(ave) / N << ' ' << imax << std::endl; // here verify use of block local counters diff --git a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc index 5e8f4ee3b8e9a..d96d5d5b9b403 100644 --- a/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc +++ b/HeterogeneousCore/AlpakaInterface/test/alpaka/testPrefixScan.dev.cc @@ -43,8 +43,8 @@ struct testPrefixScan { blockPrefixScan(acc, c, co, size, ws); blockPrefixScan(acc, c, size, ws); - ALPAKA_ASSERT_OFFLOAD(1 == c[0]); - ALPAKA_ASSERT_OFFLOAD(1 == co[0]); + ALPAKA_ASSERT_ACC(1 == c[0]); + ALPAKA_ASSERT_ACC(1 == co[0]); // TODO: not needed? Not in multi kernel version, not in CUDA version alpaka::syncBlockThreads(acc); @@ -59,9 +59,9 @@ struct testPrefixScan { if (!((c[i] == c[i - 1] + 1) && (c[i] == i + 1) && (c[i] == co[i]))) printf("c[%d]=%f, co[%d]=%f\n", i, c[i], i, co[i]); } - ALPAKA_ASSERT_OFFLOAD(c[i] == c[i - 1] + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == i + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == co[i]); + ALPAKA_ASSERT_ACC(c[i] == c[i - 1] + 1); + ALPAKA_ASSERT_ACC(c[i] == i + 1); + ALPAKA_ASSERT_ACC(c[i] == co[i]); } } }; @@ -74,7 +74,7 @@ struct testWarpPrefixScan { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t size) const { if constexpr (!requires_single_thread_per_block_v) { - ALPAKA_ASSERT_OFFLOAD(size <= 32); + ALPAKA_ASSERT_ACC(size <= 32); auto& c = alpaka::declareSharedVar(acc); auto& co = alpaka::declareSharedVar(acc); @@ -90,18 +90,18 @@ struct testWarpPrefixScan { alpaka::syncBlockThreads(acc); - ALPAKA_ASSERT_OFFLOAD(1 == c[0]); - ALPAKA_ASSERT_OFFLOAD(1 == co[0]); + ALPAKA_ASSERT_ACC(1 == c[0]); + ALPAKA_ASSERT_ACC(1 == co[0]); if (i != 0) { if (c[i] != c[i - 1] + 1) printf(format_traits::failed_msg, size, i, blockDimension, c[i], c[i - 1]); - ALPAKA_ASSERT_OFFLOAD(c[i] == c[i - 1] + 1); - ALPAKA_ASSERT_OFFLOAD(c[i] == static_cast(i + 1)); - ALPAKA_ASSERT_OFFLOAD(c[i] == co[i]); + ALPAKA_ASSERT_ACC(c[i] == c[i - 1] + 1); + ALPAKA_ASSERT_ACC(c[i] == static_cast(i + 1)); + ALPAKA_ASSERT_ACC(c[i] == co[i]); } } else { // We should never be called outsie of the GPU. - ALPAKA_ASSERT_OFFLOAD(false); + ALPAKA_ASSERT_ACC(false); } } }; @@ -122,7 +122,7 @@ struct verify { template ALPAKA_FN_ACC void operator()(const TAcc& acc, uint32_t const* v, uint32_t n) const { for (auto index : uniform_elements(acc, n)) { - ALPAKA_ASSERT_OFFLOAD(v[index] == index + 1); + ALPAKA_ASSERT_ACC(v[index] == index + 1); if (index == 0) printf("verify\n");