-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Split implementation to separate header files
Rename classes to CamelCase and move them to the detail namespace: - uniform_elements_along to detail::UniformElementsAlong - uniform_groups_along to detail::UniformGroupsAlong - uniform_group_elements_along to detail::UniformGroupElementsAlong - uniform_elements_nd to detail::UniformElementsND - independent_groups_along to detail::IndependentGroupsAlong - independent_group_elements_along to detail::IndependentGroupElementsAlong Move the implementation to separate header files. Introduce helper functions with the old names.
- Loading branch information
Showing
7 changed files
with
1,049 additions
and
834 deletions.
There are no files selected for viewing
122 changes: 122 additions & 0 deletions
122
HeterogeneousCore/AlpakaInterface/interface/detail/IndependentGroupElementsAlong.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h | ||
|
||
#include <algorithm> | ||
#include <cstddef> | ||
#include <type_traits> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include "HeterogeneousCore/AlpakaInterface/interface/config.h" | ||
|
||
namespace cms::alpakatools::detail { | ||
|
||
using namespace alpaka_common; | ||
|
||
/* IndependentGroupElementsAlong | ||
* | ||
* `independent_group_elements_along<Dim>(acc, ...)` is a shorthand for | ||
* `IndependentGroupElementsAlong<TAcc, Dim>(acc, ...)` that can infer the accelerator type from the argument. | ||
*/ | ||
|
||
template <typename TAcc, | ||
std::size_t Dim, | ||
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>> | ||
class IndependentGroupElementsAlong { | ||
public: | ||
ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc) | ||
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}, | ||
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}, | ||
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}, | ||
extent_{stride_} {} | ||
|
||
ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx extent) | ||
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}, | ||
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}, | ||
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}, | ||
extent_{extent} {} | ||
|
||
ALPAKA_FN_ACC inline IndependentGroupElementsAlong(TAcc const& acc, Idx first, Idx extent) | ||
: elements_{alpaka::getWorkDiv<alpaka::Thread, alpaka::Elems>(acc)[Dim]}, | ||
thread_{alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_ + first}, | ||
stride_{alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc)[Dim] * elements_}, | ||
extent_{extent} {} | ||
|
||
class const_iterator; | ||
using iterator = const_iterator; | ||
|
||
ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(elements_, stride_, extent_, thread_); } | ||
|
||
ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(elements_, stride_, extent_, extent_); } | ||
|
||
class const_iterator { | ||
friend class IndependentGroupElementsAlong; | ||
|
||
ALPAKA_FN_ACC inline const_iterator(Idx elements, Idx stride, Idx extent, Idx first) | ||
: elements_{elements}, | ||
stride_{stride}, | ||
extent_{extent}, | ||
first_{std::min(first, extent)}, | ||
index_{first_}, | ||
range_{std::min(first + elements, extent)} {} | ||
|
||
public: | ||
ALPAKA_FN_ACC inline Idx operator*() const { return index_; } | ||
|
||
// pre-increment the iterator | ||
ALPAKA_FN_ACC inline const_iterator& operator++() { | ||
if constexpr (requires_single_thread_per_block_v<TAcc>) { | ||
// increment the index along the elements processed by the current thread | ||
++index_; | ||
if (index_ < range_) | ||
return *this; | ||
} | ||
|
||
// increment the thread index with the block stride | ||
first_ += stride_; | ||
index_ = first_; | ||
range_ = std::min(first_ + elements_, extent_); | ||
if (index_ < extent_) | ||
return *this; | ||
|
||
// the iterator has reached or passed the end of the extent, clamp it to the extent | ||
first_ = extent_; | ||
index_ = extent_; | ||
range_ = extent_; | ||
return *this; | ||
} | ||
|
||
// post-increment the iterator | ||
ALPAKA_FN_ACC inline const_iterator operator++(int) { | ||
const_iterator old = *this; | ||
++(*this); | ||
return old; | ||
} | ||
|
||
ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { | ||
return (index_ == other.index_) and (first_ == other.first_); | ||
} | ||
|
||
ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); } | ||
|
||
private: | ||
// non-const to support iterator copy and assignment | ||
Idx elements_; | ||
Idx stride_; | ||
Idx extent_; | ||
// modified by the pre/post-increment operator | ||
Idx first_; | ||
Idx index_; | ||
Idx range_; | ||
}; | ||
|
||
private: | ||
const Idx elements_; | ||
const Idx thread_; | ||
const Idx stride_; | ||
const Idx extent_; | ||
}; | ||
|
||
} // namespace cms::alpakatools::detail | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupElementsAlong_h |
124 changes: 124 additions & 0 deletions
124
HeterogeneousCore/AlpakaInterface/interface/detail/IndependentGroupsAlong.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
#ifndef HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h | ||
#define HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h | ||
|
||
#include <algorithm> | ||
#include <cstddef> | ||
#include <type_traits> | ||
|
||
#include <alpaka/alpaka.hpp> | ||
|
||
#include "HeterogeneousCore/AlpakaInterface/interface/config.h" | ||
|
||
namespace cms::alpakatools::detail { | ||
|
||
using namespace alpaka_common; | ||
|
||
/* IndependentGroupsAlong | ||
* | ||
* `IndependentGroupsAlong<TAcc, Dim>(acc, groups)` returns a one-dimensional iteratable range than spans the group | ||
* indices from 0 to `groups`; the groups are assigned to the blocks along the `Dim` dimension. If `groups` is not | ||
* specified, it defaults to the number of blocks along the `Dim` dimension. | ||
* | ||
* `independent_groups_along<Dim>(acc, ...)` is a shorthand for `IndependentGroupsAlong<TAcc, Dim>(acc, ...)` that can | ||
* infer the accelerator type from the argument. | ||
* | ||
* In a 1-dimensional kernel, `independent_groups(acc, ...)` is a shorthand for | ||
* `IndependentGroupsAlong<TAcc, 0>(acc, ...)`. | ||
* | ||
* In an N-dimensional kernel, dimension 0 is the one that increases more slowly (e.g. the outer loop), followed by | ||
* dimension 1, up to dimension N-1 that increases fastest (e.g. the inner loop). | ||
* For convenience when converting CUDA or HIP code, `independent_groups_x(acc, ...)`, `_y` and `_z` are shorthands | ||
* for `IndependentGroupsAlong<TAcc, N-1>(acc, ...)`, `<N-2>` and `<N-3>`. | ||
* | ||
* `independent_groups_along<Dim>(acc, ...)` should be called consistently by all the threads in a block. All threads | ||
* in a block see the same loop iterations, while threads in different blocks may see a different number of iterations. | ||
* If the work division has more blocks than the required number of groups, the first blocks will perform one | ||
* iteration of the loop, while the other blocks will exit the loop immediately. | ||
* If the work division has less blocks than the required number of groups, some of the blocks will perform more than | ||
* one iteration, in order to cover then whole problem space. | ||
* | ||
* For example, | ||
* | ||
* for (auto group: independent_groups_along<Dim>(acc, 7)) | ||
* | ||
* will return the group range from 0 to 6, distributed across all blocks in the work division. | ||
* If the work division has more than 7 blocks, the first 7 will perform one iteration of the loop, while the other | ||
* blocks will exit the loop immediately. For example if the work division has 8 blocks, the blocks from 0 to 6 will | ||
* process one group while block 7 will no process any. | ||
* If the work division has less than 7 blocks, some of the blocks will perform more than one iteration of the loop, | ||
* in order to cover then whole problem space. For example if the work division has 4 blocks, block 0 will process the | ||
* groups 0 and 4, block 1 will process groups 1 and 5, group 2 will process groups 2 and 6, and block 3 will process | ||
* group 3. | ||
*/ | ||
|
||
template <typename TAcc, | ||
std::size_t Dim, | ||
typename = std::enable_if_t<alpaka::isAccelerator<TAcc> and alpaka::Dim<TAcc>::value >= Dim>> | ||
class IndependentGroupsAlong { | ||
public: | ||
ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc) | ||
: first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}, | ||
stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}, | ||
extent_{stride_} {} | ||
|
||
ALPAKA_FN_ACC inline IndependentGroupsAlong(TAcc const& acc, Idx groups) | ||
: first_{alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}, | ||
stride_{alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc)[Dim]}, | ||
extent_{groups} {} | ||
|
||
class const_iterator; | ||
using iterator = const_iterator; | ||
|
||
ALPAKA_FN_ACC inline const_iterator begin() const { return const_iterator(stride_, extent_, first_); } | ||
|
||
ALPAKA_FN_ACC inline const_iterator end() const { return const_iterator(stride_, extent_, extent_); } | ||
|
||
class const_iterator { | ||
friend class IndependentGroupsAlong; | ||
|
||
ALPAKA_FN_ACC inline const_iterator(Idx stride, Idx extent, Idx first) | ||
: stride_{stride}, extent_{extent}, first_{std::min(first, extent)} {} | ||
|
||
public: | ||
ALPAKA_FN_ACC inline Idx operator*() const { return first_; } | ||
|
||
// pre-increment the iterator | ||
ALPAKA_FN_ACC inline const_iterator& operator++() { | ||
// increment the first-element-in-block index by the grid stride | ||
first_ += stride_; | ||
if (first_ < extent_) | ||
return *this; | ||
|
||
// the iterator has reached or passed the end of the extent, clamp it to the extent | ||
first_ = extent_; | ||
return *this; | ||
} | ||
|
||
// post-increment the iterator | ||
ALPAKA_FN_ACC inline const_iterator operator++(int) { | ||
const_iterator old = *this; | ||
++(*this); | ||
return old; | ||
} | ||
|
||
ALPAKA_FN_ACC inline bool operator==(const_iterator const& other) const { return (first_ == other.first_); } | ||
|
||
ALPAKA_FN_ACC inline bool operator!=(const_iterator const& other) const { return not(*this == other); } | ||
|
||
private: | ||
// non-const to support iterator copy and assignment | ||
Idx stride_; | ||
Idx extent_; | ||
// modified by the pre/post-increment operator | ||
Idx first_; | ||
}; | ||
|
||
private: | ||
const Idx first_; | ||
const Idx stride_; | ||
const Idx extent_; | ||
}; | ||
|
||
} // namespace cms::alpakatools::detail | ||
|
||
#endif // HeterogeneousCore_AlpakaInterface_interface_detail_IndependentGroupsAlong_h |
Oops, something went wrong.