From 72009e7d83486627e9aea95ac78e81f7744f1fe2 Mon Sep 17 00:00:00 2001 From: Mikael Simberg Date: Fri, 25 Jun 2021 17:27:14 +0200 Subject: [PATCH] Use sender algorithms in gen_to_std --- include/dlaf/eigensolver/gen_to_std/mc.h | 52 +++++++++++++++--------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/include/dlaf/eigensolver/gen_to_std/mc.h b/include/dlaf/eigensolver/gen_to_std/mc.h index e7e7ae9438..5b364a2209 100644 --- a/include/dlaf/eigensolver/gen_to_std/mc.h +++ b/include/dlaf/eigensolver/gen_to_std/mc.h @@ -9,9 +9,8 @@ // #pragma once -#include -#include -#include +#include +#include #include "dlaf/blas/tile.h" #include "dlaf/common/index2d.h" @@ -41,6 +40,9 @@ struct GenToStd { template void GenToStd::call_L(Matrix& mat_a, Matrix& mat_l) { + using hpx::execution::experimental::detach; + using hpx::threads::thread_priority; + constexpr auto Right = blas::Side::Right; constexpr auto Left = blas::Side::Left; constexpr auto Lower = blas::Uplo::Lower; @@ -60,7 +62,8 @@ void GenToStd::call_L(Matrix& mat_a const auto kk = LocalTileIndex{k, k}; // Direct transformation to standard eigenvalue problem of the diagonal tile - hpx::dataflow(executor_hp, unwrapping(tile::internal::hegst_o), 1, Lower, mat_a(kk), mat_l(kk)); + dlaf::internal::whenAllLift(1, Lower, mat_a.readwrite_sender(kk), mat_l.readwrite_sender(kk)) | + dlaf::tile::hegst(dlaf::internal::Policy(thread_priority::high)) | detach(); if (k != (n - 1)) { const LocalTileIndex ai_start(k + 1, k); @@ -68,10 +71,12 @@ void GenToStd::call_L(Matrix& mat_a const auto ai_panel = dlaf::common::iterate_range2d(ai_start, ai_end); for (const auto& ik : ai_panel) { - hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::trsm_o), Right, Lower, - ConjTrans, NonUnit, T(1.0), mat_l.read(kk), mat_a(ik)); - hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::hemm_o), Right, Lower, - T(-0.5), mat_a.read(kk), mat_l.read(ik), T(1.0), mat_a(ik)); + dlaf::internal::whenAllLift(Right, Lower, ConjTrans, NonUnit, T(1.0), mat_l.read_sender(kk), + mat_a.readwrite_sender(ik)) | + dlaf::tile::trsm(dlaf::internal::Policy(thread_priority::normal)) | detach(); + dlaf::internal::whenAllLift(Right, Lower, T(-0.5), mat_a.read_sender(kk), mat_l.read_sender(ik), + T(1.0), mat_a.readwrite_sender(ik)) | + dlaf::tile::hemm(dlaf::internal::Policy(thread_priority::normal)) | detach(); } const LocalTileIndex ti_start(k + 1, k + 1); @@ -82,33 +87,40 @@ void GenToStd::call_L(Matrix& mat_a const auto ik = LocalTileIndex{ij.row(), k}; if (ij.row() == ij.col()) { - hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::her2k_o), Lower, NoTrans, - T(-1.0), mat_a.read(jk), mat_l.read(jk), BaseType(1.0), mat_a(ij)); + dlaf::internal::whenAllLift(Lower, NoTrans, T(-1.0), mat_a.read_sender(jk), + mat_l.read_sender(jk), BaseType(1.0), + mat_a.readwrite_sender(ij)) | + dlaf::tile::her2k(dlaf::internal::Policy(thread_priority::high)) | detach(); } else if (ij.row() > ij.col()) { - hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans, - ConjTrans, T(-1.0), mat_a.read(ik), mat_l.read(jk), T(1.0), mat_a(ij)); - hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans, - ConjTrans, T(-1.0), mat_l.read(ik), mat_a.read(jk), T(1.0), mat_a(ij)); + dlaf::internal::whenAllLift(NoTrans, ConjTrans, T(-1.0), mat_a.read_sender(ik), + mat_l.read_sender(jk), T(1.0), mat_a.readwrite_sender(ij)) | + dlaf::tile::gemm(dlaf::internal::Policy(thread_priority::normal)) | detach(); + dlaf::internal::whenAllLift(NoTrans, ConjTrans, T(-1.0), mat_l.read_sender(ik), + mat_a.read_sender(jk), T(1.0), mat_a.readwrite_sender(ij)) | + dlaf::tile::gemm(dlaf::internal::Policy(thread_priority::normal)) | detach(); } } for (const auto& ik : ai_panel) { - hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::hemm_o), Right, Lower, - T(-0.5), mat_a.read(kk), mat_l.read(ik), T(1.0), mat_a(ik)); + dlaf::internal::whenAllLift(Right, Lower, T(-0.5), mat_a.read_sender(kk), mat_l.read_sender(ik), + T(1.0), mat_a.readwrite_sender(ik)) | + dlaf::tile::hemm(dlaf::internal::Policy(thread_priority::high)) | detach(); } for (SizeType j = k + 1; j < n; ++j) { const auto jj = LocalTileIndex{j, j}; const auto jk = LocalTileIndex{j, k}; - hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::trsm_o), Left, Lower, - NoTrans, NonUnit, T(1.0), mat_l.read(jj), mat_a(jk)); + dlaf::internal::whenAllLift(Left, Lower, NoTrans, NonUnit, T(1.0), mat_l.read_sender(jj), + mat_a.readwrite_sender(jk)) | + dlaf::tile::trsm(dlaf::internal::Policy(thread_priority::high)) | detach(); for (SizeType i = j + 1; i < m; ++i) { const auto ij = LocalTileIndex{i, j}; const auto ik = LocalTileIndex{i, k}; - hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans, NoTrans, - T(-1.0), mat_l.read(ij), mat_a.read(jk), T(1.0), mat_a(ik)); + dlaf::internal::whenAllLift(NoTrans, NoTrans, T(-1.0), mat_l.read_sender(ij), + mat_a.read_sender(jk), T(1.0), mat_a.readwrite_sender(ik)) | + dlaf::tile::gemm(dlaf::internal::Policy(thread_priority::normal)) | detach(); } } }