Skip to content

Commit

Permalink
Merge pull request kokkos#1171 from lucbv/Fix_using_namespace
Browse files Browse the repository at this point in the history
using namespace should be scoped to prevent name clashes, see issue kokkos#1170
  • Loading branch information
lucbv authored Nov 4, 2021
2 parents c451856 + 52b0907 commit e9496bb
Show file tree
Hide file tree
Showing 6 changed files with 156 additions and 101 deletions.
154 changes: 90 additions & 64 deletions perf_test/blas/blas3/KokkosBlas3_gemm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -416,7 +416,7 @@ void __do_gemm_serial_batched_template(options_t options,
C = Kokkos::subview(_gemm_args.C, Kokkos::ALL(), Kokkos::ALL(), j);
}

SerialGemm<TransAType, TransBType, AlgoType>::invoke(
KokkosBatched::SerialGemm<TransAType, TransBType, AlgoType>::invoke(
_gemm_args.alpha, A, B, _gemm_args.beta, C);
}
}
Expand Down Expand Up @@ -445,9 +445,9 @@ template <class scalar_type, class vta, class vtb, class vtc, class device_type,
void __do_gemm_serial_batched(options_t options, gemm_args_t gemm_args) {
char a = toupper(gemm_args.transA);
char b = toupper(gemm_args.transB);
using N = Trans::NoTranspose;
using T = Trans::Transpose;
// using C = Trans::ConjTranspose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = KokkosBatched::Trans::ConjTranspose;

STATUS;

Expand Down Expand Up @@ -479,12 +479,14 @@ template <class algo_tag, class blocking_type, class device_type,
class algo_mode = void>
void __do_gemm_parallel_batched_heuristic_template(options_t options,
gemm_args_t gemm_args) {
BatchedGemmHandle batchedGemmHandle(BaseHeuristicAlgos::SQUARE);
KokkosBatched::BatchedGemmHandle batchedGemmHandle(
KokkosBatched::BaseHeuristicAlgos::SQUARE);
char a = toupper(gemm_args.transA);
char b = toupper(gemm_args.transB);
using N = Trans::NoTranspose;
using T = Trans::Transpose;
// using C = Trans::ConjTranspose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = KokkosBatched::Trans::ConjTranspose;
using KokkosBatched::BatchLayout;

STATUS;
if (a == 'N' && b == 'N') {
Expand Down Expand Up @@ -950,9 +952,9 @@ template <class algo_tag, class blocking_type, class device_type,
void __do_gemm_parallel_batched(options_t options, gemm_args_t gemm_args) {
char a = gemm_args.transA;
char b = gemm_args.transB;
using N = Trans::NoTranspose;
using T = Trans::Transpose;
// using C = Trans::ConjTranspose;
using N = KokkosBatched::Trans::NoTranspose;
using T = KokkosBatched::Trans::Transpose;
// using C = KokkosBatched::Trans::ConjTranspose;

STATUS;

Expand Down Expand Up @@ -1515,8 +1517,12 @@ void __do_gemm_armpl(options_t options, gemm_args_t gemm_args) {
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
char transa = std::is_same<TransAType, Trans::NoTranspose>::value ? 'N' : 'T';
char transb = std::is_same<TransBType, Trans::NoTranspose>::value ? 'N' : 'T';
char transa =
std::is_same<TransAType, KokkosBatched::Trans::NoTranspose>::value ? 'N'
: 'T';
char transb =
std::is_same<TransBType, KokkosBatched::Trans::NoTranspose>::value ? 'N'
: 'T';

if (!std::is_same<default_scalar, double>::value)
FATAL_ERROR("only double scalars are supported!");
Expand Down Expand Up @@ -2229,18 +2235,20 @@ void do_gemm_serial_blas(options_t options) {
void do_gemm_serial_batched(options_t options) {
STATUS;
__do_loop_and_invoke(
options, __do_gemm_serial_batched<default_scalar, view_type_3d,
view_type_3d, view_type_3d,
default_device, Algo::Gemm::Unblocked>);
options,
__do_gemm_serial_batched<default_scalar, view_type_3d, view_type_3d,
view_type_3d, default_device,
KokkosBatched::Algo::Gemm::Unblocked>);
return;
}

void do_gemm_serial_batched_blocked(options_t options) {
STATUS;
__do_loop_and_invoke(
options, __do_gemm_serial_batched<default_scalar, view_type_3d,
view_type_3d, view_type_3d,
default_device, Algo::Gemm::Blocked>);
options,
__do_gemm_serial_batched<default_scalar, view_type_3d, view_type_3d,
view_type_3d, default_device,
KokkosBatched::Algo::Gemm::Blocked>);
return;
}

Expand All @@ -2263,26 +2271,29 @@ void do_gemm_serial_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialBatchDim3Tag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<SerialBatchDim3Tag,
KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<SerialTag, Algo::Gemm::Unblocked,
default_device>);
options,
__do_gemm_parallel_batched<
SerialTag, KokkosBatched::Algo::Gemm::Unblocked, default_device>);
return;
}

void do_gemm_serial_batched_blocked_parallel(options_t options) {
STATUS;
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialBatchDim3Tag, Algo::Gemm::Blocked,
default_device>);
options, __do_gemm_parallel_batched<SerialBatchDim3Tag,
KokkosBatched::Algo::Gemm::Blocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<SerialTag, Algo::Gemm::Blocked,
default_device>);
options,
__do_gemm_parallel_batched<
SerialTag, KokkosBatched::Algo::Gemm::Blocked, default_device>);
return;
}

Expand All @@ -2293,13 +2304,14 @@ void do_gemm_serial_simd_batched_parallel(options_t options) {
options.use_simd = true;
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Unblocked,
default_device, Mode::Serial>);
options, __do_gemm_parallel_batched<
TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Serial>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Unblocked,
default_device, Mode::Serial>);
__do_loop_and_invoke(options,
__do_gemm_parallel_batched<
TeamSimdTag, KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Serial>);
return;
}

Expand All @@ -2310,13 +2322,14 @@ void do_gemm_serial_simd_batched_blocked_parallel(options_t options) {
options.use_simd = true;
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Blocked,
default_device, Mode::Serial>);
options, __do_gemm_parallel_batched<
TeamSimdBatchDim4Tag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Serial>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Blocked,
default_device, Mode::Serial>);
__do_loop_and_invoke(options,
__do_gemm_parallel_batched<
TeamSimdTag, KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Serial>);
return;
}

Expand All @@ -2329,11 +2342,13 @@ void do_gemm_serial_batched_compact_mkl_parallel(options_t options) {
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialSimdBatchDim3Tag,
Algo::Gemm::CompactMKL, default_device>);
KokkosBatched::Algo::Gemm::CompactMKL,
default_device>);
else
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<SerialSimdTag, Algo::Gemm::CompactMKL,
__do_gemm_parallel_batched<SerialSimdTag,
KokkosBatched::Algo::Gemm::CompactMKL,
default_device>);
return;
}
Expand Down Expand Up @@ -2367,26 +2382,29 @@ void do_gemm_team_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamBatchDim3Tag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<TeamBatchDim3Tag,
KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamTag, Algo::Gemm::Unblocked,
default_device>);
options,
__do_gemm_parallel_batched<
TeamTag, KokkosBatched::Algo::Gemm::Unblocked, default_device>);
return;
}

void do_gemm_team_batched_blocked_parallel(options_t options) {
STATUS;
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamBatchDim3Tag, Algo::Gemm::Blocked,
default_device>);
options, __do_gemm_parallel_batched<TeamBatchDim3Tag,
KokkosBatched::Algo::Gemm::Blocked,
default_device>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamTag, Algo::Gemm::Blocked,
default_device>);
options,
__do_gemm_parallel_batched<TeamTag, KokkosBatched::Algo::Gemm::Blocked,
default_device>);
return;
}

Expand All @@ -2396,11 +2414,13 @@ void do_gemm_team_vector_batched_parallel(options_t options) {
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamVectorBatchDim3Tag,
Algo::Gemm::Unblocked, default_device>);
KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
else
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamVectorTag, Algo::Gemm::Unblocked,
__do_gemm_parallel_batched<TeamVectorTag,
KokkosBatched::Algo::Gemm::Unblocked,
default_device>);
return;
}
Expand All @@ -2411,12 +2431,15 @@ void do_gemm_team_simd_batched_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Unblocked,
default_device, Mode::Team>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag,
KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Team>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Unblocked,
default_device, Mode::Team>);
options,
__do_gemm_parallel_batched<TeamSimdTag,
KokkosBatched::Algo::Gemm::Unblocked,
default_device, KokkosBatched::Mode::Team>);
return;
}

Expand All @@ -2426,28 +2449,31 @@ void do_gemm_team_simd_batched_blocked_parallel(options_t options) {
if (options.blas_args.batch_size_last_dim)
__do_loop_and_invoke(
options,
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag, Algo::Gemm::Blocked,
default_device, Mode::Team>);
__do_gemm_parallel_batched<TeamSimdBatchDim4Tag,
KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Team>);
else
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamSimdTag, Algo::Gemm::Blocked,
default_device, Mode::Team>);
options,
__do_gemm_parallel_batched<TeamSimdTag,
KokkosBatched::Algo::Gemm::Blocked,
default_device, KokkosBatched::Mode::Team>);
return;
}

// Blocked algo not yet implemented for TeamVectorGemm.
/* void do_gemm_team_vector_batched_blocked_parallel(options_t options) {
STATUS;
__do_loop_and_invoke(
options, __do_gemm_parallel_batched<TeamVectorTag, Algo::Gemm::Blocked,
default_device>); return;
options, __do_gemm_parallel_batched<TeamVectorTag,
KokkosBatched::Algo::Gemm::Blocked, default_device>); return;
} */

void do_gemm_experiment_parallel(options_t options) {
STATUS;
using TransAType = Trans::NoTranspose;
using TransBType = Trans::NoTranspose;
using BlockingType = Algo::Gemm::Unblocked;
using TransAType = KokkosBatched::Trans::NoTranspose;
using TransBType = KokkosBatched::Trans::NoTranspose;
using BlockingType = KokkosBatched::Algo::Gemm::Unblocked;

// __do_loop_and_invoke(
// options, __do_gemm_parallel_experiment1<TransAType, TransBType,
Expand Down
24 changes: 18 additions & 6 deletions perf_test/blas/blas3/KokkosBlas3_trmm_perf_test.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -272,14 +272,15 @@ void __do_trmm_serial_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trmm::Unblocked;
using tag = KokkosBatched::Algo::Trmm::Unblocked;

for (uint32_t j = 0; j < warm_up_n; ++j) {
for (int i = 0; i < options.start.a.k; ++i) {
auto A = Kokkos::subview(trmm_args.A, i, Kokkos::ALL(), Kokkos::ALL());
auto B = Kokkos::subview(trmm_args.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(
trmm_args.alpha, A, B);
}
// Fence after submitting each batch operation
Kokkos::fence();
Expand All @@ -291,7 +292,8 @@ void __do_trmm_serial_batched_template(options_t options,
auto A = Kokkos::subview(trmm_args.A, i, Kokkos::ALL(), Kokkos::ALL());
auto B = Kokkos::subview(trmm_args.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args.alpha, A, B);
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(
trmm_args.alpha, A, B);
}
// Fence after submitting each batch operation
Kokkos::fence();
Expand All @@ -315,6 +317,11 @@ void __do_trmm_serial_batched(options_t options, trmm_args_t trmm_args) {
__trans = tolower(trmm_args.trans);
//__diag = tolower(diag[0]);

using KokkosBatched::Diag;
using KokkosBatched::Side;
using KokkosBatched::Trans;
using KokkosBatched::Uplo;

STATUS;

//// Lower non-transpose ////
Expand Down Expand Up @@ -480,8 +487,8 @@ struct parallel_batched_trmm {
auto svA = Kokkos::subview(trmm_args_.A, i, Kokkos::ALL(), Kokkos::ALL());
auto svB = Kokkos::subview(trmm_args_.B, i, Kokkos::ALL(), Kokkos::ALL());

SerialTrmm<side, uplo, trans, diag, tag>::invoke(trmm_args_.alpha, svA,
svB);
KokkosBatched::SerialTrmm<side, uplo, trans, diag, tag>::invoke(
trmm_args_.alpha, svA, svB);
}
};

Expand All @@ -491,7 +498,7 @@ void __do_trmm_parallel_batched_template(options_t options,
uint32_t warm_up_n = options.warm_up_n;
uint32_t n = options.n;
Kokkos::Timer timer;
using tag = Algo::Trmm::Unblocked;
using tag = KokkosBatched::Algo::Trmm::Unblocked;
using execution_space = typename device_type::execution_space;
using functor_type =
parallel_batched_trmm<side, uplo, trans, diag, tag, execution_space>;
Expand Down Expand Up @@ -528,6 +535,11 @@ void __do_trmm_parallel_batched(options_t options, trmm_args_t trmm_args) {
__trans = tolower(trmm_args.trans);
//__diag = tolower(diag[0]);

using KokkosBatched::Diag;
using KokkosBatched::Side;
using KokkosBatched::Trans;
using KokkosBatched::Uplo;

STATUS;

//// Lower non-transpose ////
Expand Down
Loading

0 comments on commit e9496bb

Please sign in to comment.