Skip to content

Commit

Permalink
Rename Impl::alignPtr to Impl::alignPtrTo, allow it to infer argu…
Browse files Browse the repository at this point in the history
…ment type (#2261)

* KokkosKernels::Impl::alignPtr infers argument type

* Rename KokkosKernels::Impl::alignPtr -> alignPtrTo
  • Loading branch information
cwpearson authored Jun 28, 2024
1 parent bbfc3ff commit 41954e2
Show file tree
Hide file tree
Showing 6 changed files with 20 additions and 27 deletions.
4 changes: 2 additions & 2 deletions common/src/KokkosKernels_Utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1527,8 +1527,8 @@ struct array_sum_reduce {
}
};

template <typename InPtr, typename T>
KOKKOS_INLINE_FUNCTION T *alignPtr(InPtr p) {
template <typename T, typename InPtr>
KOKKOS_INLINE_FUNCTION T *alignPtrTo(InPtr p) {
// ugly but computationally free and the "right" way to do this in C++
std::uintptr_t ptrVal = reinterpret_cast<std::uintptr_t>(p);
// ptrVal + (align - 1) lands inside the next valid aligned scalar_t,
Expand Down
12 changes: 5 additions & 7 deletions sparse/impl/KokkosSparse_bspgemm_impl_kkmem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,8 +270,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *hash_ids = (nnz_lno_t *)(tmp);
tmp += pow2_hash_size;

scalar_t *hash_values =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(tmp);
scalar_t *hash_values = KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp);

BlockAccumulator hm(block_dim, pow2_hash_size, pow2_hash_func, nullptr,
nullptr, hash_ids, hash_values);
Expand Down Expand Up @@ -414,7 +413,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
all_shared_memory += sizeof(nnz_lno_t) * thread_shmem_key_size;
// remainder of shmem allocation for vals
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

BlockAccumulator hm(block_dim, thread_shmem_key_size,
thread_shared_memory_hash_func, begins, nexts, keys,
Expand Down Expand Up @@ -554,7 +553,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();

Expand Down Expand Up @@ -601,8 +600,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
}
global_acc_row_keys = (nnz_lno_t *)(tmp);
global_acc_row_vals =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(
tmp + pow2_hash_size);
KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp + pow2_hash_size);
}
// initialize begins.
{
Expand Down Expand Up @@ -885,7 +883,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();

Expand Down
2 changes: 1 addition & 1 deletion sparse/impl/KokkosSparse_bspgemm_impl_speed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ struct KokkosBSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * shmem_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

KokkosKernels::Experimental::BlockHashmapAccumulator<
nnz_lno_t, nnz_lno_t, scalar_t,
Expand Down
15 changes: 6 additions & 9 deletions sparse/impl/KokkosSparse_spgemm_impl_kkmem.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -261,8 +261,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *hash_ids = (nnz_lno_t *)(tmp);
tmp += pow2_hash_size;

scalar_t *hash_values =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(tmp);
scalar_t *hash_values = KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp);

Kokkos::parallel_for(
Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end),
Expand Down Expand Up @@ -409,8 +408,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,

hm2.keys = (nnz_lno_t *)(tmp);
tmp += max_nnz;
hm2.values =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(tmp);
hm2.values = KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp);

Kokkos::parallel_for(
Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end),
Expand Down Expand Up @@ -498,7 +496,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
all_shared_memory += sizeof(nnz_lno_t) * thread_shmem_key_size;
// remainder of shmem allocation for vals
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

KokkosKernels::Experimental::HashmapAccumulator<
nnz_lno_t, nnz_lno_t, scalar_t,
Expand Down Expand Up @@ -639,7 +637,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();

Expand Down Expand Up @@ -686,8 +684,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
}
global_acc_row_keys = (nnz_lno_t *)(tmp);
global_acc_row_vals =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(
tmp + pow2_hash_size);
KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp + pow2_hash_size);
}
// initialize begins.
{
Expand Down Expand Up @@ -970,7 +967,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();

Expand Down
2 changes: 1 addition & 1 deletion sparse/impl/KokkosSparse_spgemm_impl_speed.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * shmem_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

KokkosKernels::Experimental::HashmapAccumulator<
nnz_lno_t, nnz_lno_t, scalar_t,
Expand Down
12 changes: 5 additions & 7 deletions sparse/impl/KokkosSparse_spgemm_jacobi_sparseacc_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
tmp += max_nnz;
nnz_lno_t *hash_ids = (nnz_lno_t *)(tmp);
tmp += pow2_hash_size;
scalar_t *hash_values =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(tmp);
scalar_t *hash_values = KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp);

Kokkos::parallel_for(
Kokkos::TeamThreadRange(teamMember, team_row_begin, team_row_end),
Expand Down Expand Up @@ -452,7 +451,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,

// Remainder of shmem allocation for vals
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

// Create the hashmaps
KokkosKernels::Experimental::HashmapAccumulator<
Expand Down Expand Up @@ -610,7 +609,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();
int vector_rank = 0;
Expand Down Expand Up @@ -826,7 +825,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
nnz_lno_t *keys = (nnz_lno_t *)(all_shared_memory);
all_shared_memory += sizeof(nnz_lno_t) * team_cuckoo_key_size;
scalar_t *vals =
KokkosKernels::Impl::alignPtr<char *, scalar_t>(all_shared_memory);
KokkosKernels::Impl::alignPtrTo<scalar_t>(all_shared_memory);

int thread_rank = teamMember.team_rank();
int vector_rank = 0;
Expand Down Expand Up @@ -871,8 +870,7 @@ struct KokkosSPGEMM<HandleType, a_row_view_t_, a_lno_nnz_view_t_,
}
global_acc_row_keys = (nnz_lno_t *)(tmp);
global_acc_row_vals =
KokkosKernels::Impl::alignPtr<volatile nnz_lno_t *, scalar_t>(
tmp + pow2_hash_size);
KokkosKernels::Impl::alignPtrTo<scalar_t>(tmp + pow2_hash_size);

nnz_lno_t num_threads = pow2_hash_size / vector_size;
Kokkos::parallel_for(
Expand Down

0 comments on commit 41954e2

Please sign in to comment.