Skip to content

Commit

Permalink
Use sender algorithms in gen_to_std
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg committed Jun 25, 2021
1 parent bff4744 commit 72009e7
Showing 1 changed file with 32 additions and 20 deletions.
52 changes: 32 additions & 20 deletions include/dlaf/eigensolver/gen_to_std/mc.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
//
#pragma once

#include <hpx/include/parallel_executors.hpp>
#include <hpx/include/resource_partitioner.hpp>
#include <hpx/include/threads.hpp>
#include <hpx/local/execution.hpp>
#include <hpx/local/thread.hpp>

#include "dlaf/blas/tile.h"
#include "dlaf/common/index2d.h"
Expand Down Expand Up @@ -41,6 +40,9 @@ struct GenToStd<Backend::MC, Device::CPU, T> {
template <class T>
void GenToStd<Backend::MC, Device::CPU, T>::call_L(Matrix<T, Device::CPU>& mat_a,
Matrix<T, Device::CPU>& mat_l) {
using hpx::execution::experimental::detach;
using hpx::threads::thread_priority;

constexpr auto Right = blas::Side::Right;
constexpr auto Left = blas::Side::Left;
constexpr auto Lower = blas::Uplo::Lower;
Expand All @@ -60,18 +62,21 @@ void GenToStd<Backend::MC, Device::CPU, T>::call_L(Matrix<T, Device::CPU>& mat_a
const auto kk = LocalTileIndex{k, k};

// Direct transformation to standard eigenvalue problem of the diagonal tile
hpx::dataflow(executor_hp, unwrapping(tile::internal::hegst_o), 1, Lower, mat_a(kk), mat_l(kk));
dlaf::internal::whenAllLift(1, Lower, mat_a.readwrite_sender(kk), mat_l.readwrite_sender(kk)) |
dlaf::tile::hegst(dlaf::internal::Policy<Backend::MC>(thread_priority::high)) | detach();

if (k != (n - 1)) {
const LocalTileIndex ai_start(k + 1, k);
const LocalTileIndex ai_end(m, k + 1);
const auto ai_panel = dlaf::common::iterate_range2d(ai_start, ai_end);

for (const auto& ik : ai_panel) {
hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::trsm_o), Right, Lower,
ConjTrans, NonUnit, T(1.0), mat_l.read(kk), mat_a(ik));
hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::hemm_o), Right, Lower,
T(-0.5), mat_a.read(kk), mat_l.read(ik), T(1.0), mat_a(ik));
dlaf::internal::whenAllLift(Right, Lower, ConjTrans, NonUnit, T(1.0), mat_l.read_sender(kk),
mat_a.readwrite_sender(ik)) |
dlaf::tile::trsm(dlaf::internal::Policy<Backend::MC>(thread_priority::normal)) | detach();
dlaf::internal::whenAllLift(Right, Lower, T(-0.5), mat_a.read_sender(kk), mat_l.read_sender(ik),
T(1.0), mat_a.readwrite_sender(ik)) |
dlaf::tile::hemm(dlaf::internal::Policy<Backend::MC>(thread_priority::normal)) | detach();
}

const LocalTileIndex ti_start(k + 1, k + 1);
Expand All @@ -82,33 +87,40 @@ void GenToStd<Backend::MC, Device::CPU, T>::call_L(Matrix<T, Device::CPU>& mat_a
const auto ik = LocalTileIndex{ij.row(), k};

if (ij.row() == ij.col()) {
hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::her2k_o), Lower, NoTrans,
T(-1.0), mat_a.read(jk), mat_l.read(jk), BaseType<T>(1.0), mat_a(ij));
dlaf::internal::whenAllLift(Lower, NoTrans, T(-1.0), mat_a.read_sender(jk),
mat_l.read_sender(jk), BaseType<T>(1.0),
mat_a.readwrite_sender(ij)) |
dlaf::tile::her2k(dlaf::internal::Policy<Backend::MC>(thread_priority::high)) | detach();
}
else if (ij.row() > ij.col()) {
hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans,
ConjTrans, T(-1.0), mat_a.read(ik), mat_l.read(jk), T(1.0), mat_a(ij));
hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans,
ConjTrans, T(-1.0), mat_l.read(ik), mat_a.read(jk), T(1.0), mat_a(ij));
dlaf::internal::whenAllLift(NoTrans, ConjTrans, T(-1.0), mat_a.read_sender(ik),
mat_l.read_sender(jk), T(1.0), mat_a.readwrite_sender(ij)) |
dlaf::tile::gemm(dlaf::internal::Policy<Backend::MC>(thread_priority::normal)) | detach();
dlaf::internal::whenAllLift(NoTrans, ConjTrans, T(-1.0), mat_l.read_sender(ik),
mat_a.read_sender(jk), T(1.0), mat_a.readwrite_sender(ij)) |
dlaf::tile::gemm(dlaf::internal::Policy<Backend::MC>(thread_priority::normal)) | detach();
}
}

for (const auto& ik : ai_panel) {
hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::hemm_o), Right, Lower,
T(-0.5), mat_a.read(kk), mat_l.read(ik), T(1.0), mat_a(ik));
dlaf::internal::whenAllLift(Right, Lower, T(-0.5), mat_a.read_sender(kk), mat_l.read_sender(ik),
T(1.0), mat_a.readwrite_sender(ik)) |
dlaf::tile::hemm(dlaf::internal::Policy<Backend::MC>(thread_priority::high)) | detach();
}

for (SizeType j = k + 1; j < n; ++j) {
const auto jj = LocalTileIndex{j, j};
const auto jk = LocalTileIndex{j, k};
hpx::dataflow(executor_hp, matrix::unwrapExtendTiles(tile::internal::trsm_o), Left, Lower,
NoTrans, NonUnit, T(1.0), mat_l.read(jj), mat_a(jk));
dlaf::internal::whenAllLift(Left, Lower, NoTrans, NonUnit, T(1.0), mat_l.read_sender(jj),
mat_a.readwrite_sender(jk)) |
dlaf::tile::trsm(dlaf::internal::Policy<Backend::MC>(thread_priority::high)) | detach();

for (SizeType i = j + 1; i < m; ++i) {
const auto ij = LocalTileIndex{i, j};
const auto ik = LocalTileIndex{i, k};
hpx::dataflow(executor_np, matrix::unwrapExtendTiles(tile::internal::gemm_o), NoTrans, NoTrans,
T(-1.0), mat_l.read(ij), mat_a.read(jk), T(1.0), mat_a(ik));
dlaf::internal::whenAllLift(NoTrans, NoTrans, T(-1.0), mat_l.read_sender(ij),
mat_a.read_sender(jk), T(1.0), mat_a.readwrite_sender(ik)) |
dlaf::tile::gemm(dlaf::internal::Policy<Backend::MC>(thread_priority::normal)) | detach();
}
}
}
Expand Down

0 comments on commit 72009e7

Please sign in to comment.