Skip to content

Commit

Permalink
Update prefixScan, OneToManyAssoc and HistoContainer for Alpaka 1.1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
fwyzard committed Feb 12, 2024
1 parent 9bfce46 commit 42794f5
Show file tree
Hide file tree
Showing 8 changed files with 102 additions and 102 deletions.
32 changes: 16 additions & 16 deletions HeterogeneousCore/AlpakaInterface/interface/HistoContainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ namespace cms::alpakatools {
const uint32_t nt = offsets[nh];
for (uint32_t i : uniform_elements(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
ALPAKA_ASSERT_ACC((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
ALPAKA_ASSERT_ACC(ih >= 0);
ALPAKA_ASSERT_ACC(ih < int(nh));
h->count(acc, v[i], ih);
}
}
Expand All @@ -46,10 +46,10 @@ namespace cms::alpakatools {
const uint32_t nt = offsets[nh];
for (uint32_t i : uniform_elements(acc, nt)) {
auto off = alpaka_std::upper_bound(offsets, offsets + nh + 1, i);
ALPAKA_ASSERT_OFFLOAD((*off) > 0);
ALPAKA_ASSERT_ACC((*off) > 0);
int32_t ih = off - offsets - 1;
ALPAKA_ASSERT_OFFLOAD(ih >= 0);
ALPAKA_ASSERT_OFFLOAD(ih < int(nh));
ALPAKA_ASSERT_ACC(ih >= 0);
ALPAKA_ASSERT_ACC(ih < int(nh));
h->fill(acc, v[i], i, ih);
}
}
Expand Down Expand Up @@ -102,7 +102,7 @@ namespace cms::alpakatools {
int bs = Hist::bin(value);
int be = std::min(int(Hist::nbins() - 1), bs + n);
bs = std::max(0, bs - n);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
ALPAKA_ASSERT_ACC(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
Expand All @@ -113,7 +113,7 @@ namespace cms::alpakatools {
ALPAKA_FN_ACC ALPAKA_FN_INLINE void forEachInWindow(Hist const &hist, V wmin, V wmax, Func const &func) {
auto bs = Hist::bin(wmin);
auto be = Hist::bin(wmax);
ALPAKA_ASSERT_OFFLOAD(be >= bs);
ALPAKA_ASSERT_ACC(be >= bs);
for (auto pj = hist.begin(bs); pj < hist.end(be); ++pj) {
func(*pj);
}
Expand Down Expand Up @@ -164,36 +164,36 @@ namespace cms::alpakatools {
template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
this->content[w - 1] = j;
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, T t, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
ALPAKA_ASSERT_ACC(b < totbins());
Base::atomicIncrement(acc, this->off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, T t, index_type j, uint32_t nh) {
uint32_t b = bin(t);
ALPAKA_ASSERT_OFFLOAD(b < nbins());
ALPAKA_ASSERT_ACC(b < nbins());
b += histOff(nh);
ALPAKA_ASSERT_OFFLOAD(b < totbins());
ALPAKA_ASSERT_ACC(b < totbins());
auto w = Base::atomicDecrement(acc, this->off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
this->content[w - 1] = j;
}
};
Expand Down
42 changes: 21 additions & 21 deletions HeterogeneousCore/AlpakaInterface/interface/OneToManyAssoc.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ namespace cms::alpakatools {
constexpr auto capacity() const { return content.capacity(); }

ALPAKA_FN_HOST_ACC void initStorage(View view) {
ALPAKA_ASSERT_OFFLOAD(view.assoc == this);
ALPAKA_ASSERT_ACC(view.assoc == this);
if constexpr (ctCapacity() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
ALPAKA_ASSERT_ACC(view.contentStorage);
ALPAKA_ASSERT_ACC(view.contentSize > 0);
content.init(view.contentStorage, view.contentSize);
}
if constexpr (ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
off.init(view.offStorage, view.offSize);
}
}
Expand Down Expand Up @@ -80,24 +80,24 @@ namespace cms::alpakatools {

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void count(const TAcc &acc, I b) {
ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
atomicIncrement(acc, off[b]);
}

template <typename TAcc>
ALPAKA_FN_ACC ALPAKA_FN_INLINE void fill(const TAcc &acc, I b, index_type j) {
ALPAKA_ASSERT_OFFLOAD(b < static_cast<uint32_t>(nOnes()));
ALPAKA_ASSERT_ACC(b < static_cast<uint32_t>(nOnes()));
auto w = atomicDecrement(acc, off[b]);
ALPAKA_ASSERT_OFFLOAD(w > 0);
ALPAKA_ASSERT_ACC(w > 0);
content[w - 1] = j;
}

// this MUST BE DONE in a single block (or in two kernels!)
struct zeroAndInit {
template <typename TAcc>
ALPAKA_FN_ACC void operator()(const TAcc &acc, View view) const {
ALPAKA_ASSERT_OFFLOAD((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_OFFLOAD((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_ACC((1 == alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0]));
ALPAKA_ASSERT_ACC((0 == alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0]));
auto h = view.assoc;
if (cms::alpakatools::once_per_block(acc)) {
h->psws = 0;
Expand All @@ -119,12 +119,12 @@ namespace cms::alpakatools {
template <typename TAcc, typename TQueue>
ALPAKA_FN_INLINE static void launchZero(View view, TQueue &queue) {
if constexpr (ctCapacity() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.contentStorage);
ALPAKA_ASSERT_OFFLOAD(view.contentSize > 0);
ALPAKA_ASSERT_ACC(view.contentStorage);
ALPAKA_ASSERT_ACC(view.contentSize > 0);
}
if constexpr (ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
}
if constexpr (!requires_single_thread_per_block_v<TAcc>) {
auto nthreads = 1024;
Expand All @@ -133,7 +133,7 @@ namespace cms::alpakatools {
alpaka::exec<TAcc>(queue, workDiv, zeroAndInit{}, view);
} else {
auto h = view.assoc;
ALPAKA_ASSERT_OFFLOAD(h);
ALPAKA_ASSERT_ACC(h);
h->initStorage(view);
h->zero();
h->psws = 0;
Expand Down Expand Up @@ -213,9 +213,9 @@ namespace cms::alpakatools {

template <typename TAcc>
ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize(TAcc &acc, Counter *ws = nullptr) {
ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == 0);
ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == 0);
blockPrefixScan(acc, this->off.data(), this->totOnes(), ws);
ALPAKA_ASSERT_OFFLOAD(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
ALPAKA_ASSERT_ACC(this->off[this->totOnes() - 1] == this->off[this->totOnes() - 2]);
}

ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE void finalize() {
Expand All @@ -234,17 +234,17 @@ namespace cms::alpakatools {
ALPAKA_FN_INLINE static void launchFinalize(View view, TQueue &queue) {
// View stores a base pointer, we need to upcast back...
auto h = static_cast<OneToManyAssocRandomAccess *>(view.assoc);
ALPAKA_ASSERT_OFFLOAD(h);
ALPAKA_ASSERT_ACC(h);
if constexpr (!requires_single_thread_per_block_v<TAcc>) {
Counter *poff = (Counter *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, off));
auto nOnes = OneToManyAssocRandomAccess::ctNOnes();
if constexpr (OneToManyAssocRandomAccess::ctNOnes() < 0) {
ALPAKA_ASSERT_OFFLOAD(view.offStorage);
ALPAKA_ASSERT_OFFLOAD(view.offSize > 0);
ALPAKA_ASSERT_ACC(view.offStorage);
ALPAKA_ASSERT_ACC(view.offSize > 0);
nOnes = view.offSize;
poff = view.offStorage;
}
ALPAKA_ASSERT_OFFLOAD(nOnes > 0);
ALPAKA_ASSERT_ACC(nOnes > 0);
int32_t *ppsws = (int32_t *)((char *)(h) + offsetof(OneToManyAssocRandomAccess, psws));
auto nthreads = 1024;
auto nblocks = (nOnes + nthreads - 1) / nthreads;
Expand Down
24 changes: 12 additions & 12 deletions HeterogeneousCore/AlpakaInterface/interface/prefixScan.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,11 @@ namespace cms::alpakatools {
const auto warpSize = alpaka::warp::getSize(acc);
int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
ALPAKA_ASSERT_OFFLOAD(ws);
ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize);
ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize);
ALPAKA_ASSERT_ACC(ws);
ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
auto first = blockThreadIdx;
ALPAKA_ASSERT_OFFLOAD(isPowerOf2(warpSize));
ALPAKA_ASSERT_ACC(isPowerOf2(warpSize));
auto laneId = blockThreadIdx & (warpSize - 1);
auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;

Expand All @@ -64,7 +64,7 @@ namespace cms::alpakatools {
if (i < size) {
// Skipped in warp padding threads.
auto warpId = i / warpSize;
ALPAKA_ASSERT_OFFLOAD(warpId < warpSize);
ALPAKA_ASSERT_ACC(warpId < warpSize);
if ((warpSize - 1) == laneId)
ws[warpId] = co[i];
}
Expand Down Expand Up @@ -97,9 +97,9 @@ namespace cms::alpakatools {
const auto warpSize = alpaka::warp::getSize(acc);
int32_t const blockDimension(alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u]);
int32_t const blockThreadIdx(alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u]);
ALPAKA_ASSERT_OFFLOAD(ws);
ALPAKA_ASSERT_OFFLOAD(size <= warpSize * warpSize);
ALPAKA_ASSERT_OFFLOAD(0 == blockDimension % warpSize);
ALPAKA_ASSERT_ACC(ws);
ALPAKA_ASSERT_ACC(size <= warpSize * warpSize);
ALPAKA_ASSERT_ACC(0 == blockDimension % warpSize);
auto first = blockThreadIdx;
auto laneId = blockThreadIdx & (warpSize - 1);
auto warpUpRoundedSize = (size + warpSize - 1) / warpSize * warpSize;
Expand All @@ -110,7 +110,7 @@ namespace cms::alpakatools {
if (i < size) {
// Skipped in warp padding threads.
auto warpId = i / warpSize;
ALPAKA_ASSERT_OFFLOAD(warpId < warpSize);
ALPAKA_ASSERT_ACC(warpId < warpSize);
if ((warpSize - 1) == laneId)
ws[warpId] = c[i];
}
Expand Down Expand Up @@ -144,14 +144,14 @@ namespace cms::alpakatools {
if constexpr (!requires_single_thread_per_block_v<TAcc>) {
ws = alpaka::getDynSharedMem<T>(acc);
}
ALPAKA_ASSERT_OFFLOAD(warpSize == static_cast<std::size_t>(alpaka::warp::getSize(acc)));
ALPAKA_ASSERT_ACC(warpSize == static_cast<std::size_t>(alpaka::warp::getSize(acc)));
[[maybe_unused]] const auto elementsPerGrid = alpaka::getWorkDiv<alpaka::Grid, alpaka::Elems>(acc)[0u];
const auto elementsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Elems>(acc)[0u];
const auto threadsPerBlock = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[0u];
const auto blocksPerGrid = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[0u];
const auto blockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[0u];
const auto threadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[0u];
ALPAKA_ASSERT_OFFLOAD(elementsPerGrid >= size);
ALPAKA_ASSERT_ACC(elementsPerGrid >= size);
// first each block does a scan
[[maybe_unused]] int off = elementsPerBlock * blockIdx;
if (size - off > 0) {
Expand All @@ -172,7 +172,7 @@ namespace cms::alpakatools {
if (!isLastBlockDone)
return;

ALPAKA_ASSERT_OFFLOAD(int(blocksPerGrid) == *pc);
ALPAKA_ASSERT_ACC(int(blocksPerGrid) == *pc);

// good each block has done its work and now we are left in last block

Expand Down
10 changes: 5 additions & 5 deletions HeterogeneousCore/AlpakaInterface/interface/radixSort.h
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,9 @@ namespace cms::alpakatools {
auto& ibs = alpaka::declareSharedVar<int, __COUNTER__>(acc);
auto& currentSortingPass = alpaka::declareSharedVar<int, __COUNTER__>(acc);

ALPAKA_ASSERT_OFFLOAD(size > 0);
ALPAKA_ASSERT_ACC(size > 0);
// TODO: is this a hard requirement?
ALPAKA_ASSERT_OFFLOAD(blockDimension >= binsNumber);
ALPAKA_ASSERT_ACC(blockDimension >= binsNumber);

currentSortingPass = initialSortingPass;

Expand Down Expand Up @@ -283,7 +283,7 @@ namespace cms::alpakatools {
*/

alpaka::syncBlockThreads(acc);
ALPAKA_ASSERT_OFFLOAD(c[0] == 0);
ALPAKA_ASSERT_ACC(c[0] == 0);

// swap (local, ok)
auto t = j;
Expand All @@ -297,8 +297,8 @@ namespace cms::alpakatools {
}

if ((dataBits != 8) && (0 == (NS & 1)))
ALPAKA_ASSERT_OFFLOAD(j ==
ind); // dataBits/binBits is even so ind is correct (the result is in the right location)
ALPAKA_ASSERT_ACC(j ==
ind); // dataBits/binBits is even so ind is correct (the result is in the right location)

// TODO this copy is (doubly?) redundant with the reorder
if (j != ind) // odd number of sorting passes, we need to move the result to the right array (ind[])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,12 @@ void checkContents(Hist* h,
#ifndef NDEBUG
[[maybe_unused]] auto bk = h->bin(v[k]);
#endif
ALPAKA_ASSERT_OFFLOAD(bk == i);
ALPAKA_ASSERT_OFFLOAD(k < offsets[j + 1]);
ALPAKA_ASSERT_ACC(bk == i);
ALPAKA_ASSERT_ACC(k < offsets[j + 1]);
auto kl = h->bin(v[k] - window);
auto kh = h->bin(v[k] + window);
ALPAKA_ASSERT_OFFLOAD(kl != i);
ALPAKA_ASSERT_OFFLOAD(kh != i);
ALPAKA_ASSERT_ACC(kl != i);
ALPAKA_ASSERT_ACC(kh != i);
// std::cout << kl << ' ' << kh << std::endl;

auto me = v[k];
Expand Down Expand Up @@ -81,7 +81,7 @@ void checkContents(Hist* h,
std::cout << "what? " << j << ' ' << i << ' ' << int(me) << '/' << (int)T(me - window) << '/'
<< (int)T(me + window) << ": " << kl << '/' << kh << ' ' << khh << ' ' << tot << '/' << nm
<< std::endl;
ALPAKA_ASSERT_OFFLOAD(!l);
ALPAKA_ASSERT_ACC(!l);
}
}
int status;
Expand Down Expand Up @@ -133,7 +133,7 @@ int go(const DevHost& host, const Device& device, Queue& queue) {
offsets[0] = 0;
for (uint32_t j = 1; j < nParts + 1; ++j) {
offsets[j] = offsets[j - 1] + partSize - 3 * j;
ALPAKA_ASSERT_OFFLOAD(offsets[j] <= N);
ALPAKA_ASSERT_ACC(offsets[j] <= N);
}

if (it == 1) { // special cases...
Expand Down Expand Up @@ -210,14 +210,14 @@ int go(const DevHost& host, const Device& device, Queue& queue) {
// std::cout << offsets[i] <<" - "<< h->size() << std::endl;
// }

ALPAKA_ASSERT_OFFLOAD(0 == h->off[0]);
ALPAKA_ASSERT_OFFLOAD(offsets[10] == h->size());
ALPAKA_ASSERT_OFFLOAD(0 == hr->off[0]);
ALPAKA_ASSERT_OFFLOAD(offsets[10] == hr->size());
ALPAKA_ASSERT_ACC(0 == h->off[0]);
ALPAKA_ASSERT_ACC(offsets[10] == h->size());
ALPAKA_ASSERT_ACC(0 == hr->off[0]);
ALPAKA_ASSERT_ACC(offsets[10] == hr->size());

auto verify = [&](uint32_t i, uint32_t k, uint32_t t1, uint32_t t2) {
ALPAKA_ASSERT_OFFLOAD(t1 < N);
ALPAKA_ASSERT_OFFLOAD(t2 < N);
ALPAKA_ASSERT_ACC(t1 < N);
ALPAKA_ASSERT_ACC(t2 < N);
if (T(v[t1] - v[t2]) <= 0)
std::cout << "for " << i << ':' << v[k] << " failed " << v[t1] << ' ' << v[t2] << std::endl;
};
Expand Down
Loading

0 comments on commit 42794f5

Please sign in to comment.