Skip to content

Commit

Permalink
WIP: waiting for GPU implementation of permuteJustLocal
Browse files Browse the repository at this point in the history
  • Loading branch information
albestro committed Nov 30, 2023
1 parent 6c0ed42 commit d8ea3dd
Showing 1 changed file with 14 additions and 3 deletions.
17 changes: 14 additions & 3 deletions include/dlaf/eigensolver/tridiag_solver/merge.h
Original file line number Diff line number Diff line change
Expand Up @@ -1682,10 +1682,21 @@ void mergeDistSubproblems(comm::CommunicatorGrid grid,
auto k = ex::split(stablePartitionIndexForDeflation(dist_evecs, i_begin, i_end, ws_h.c, ws_h.d0,
ws_hm.i2, ws_h.i3, ws_hm.i5));

copy(idx_begin_tiles_vec, sz_tiles_vec, ws_hm.i5, ws.i5);
dlaf::permutations::internal::permuteJustLocal<B, D, T, Coord::Col>(i_begin, i_end, ws.i5, ws.e0,
ws.e1);
// Reorder Eigenvectors
if constexpr (Backend::MC == B) {
copy(idx_begin_tiles_vec, sz_tiles_vec, ws_hm.i5, ws.i5);
dlaf::permutations::internal::permuteJustLocal<B, D, T, Coord::Col>(i_begin, i_end, ws.i5, ws.e0,
ws.e1);
}
else {
// TODO remove this branch. It exists just because GPU permuteJustLocal is not implemented yet
copy(idx_loc_begin, sz_loc_tiles, ws.e0, ws_hm.e0);
dlaf::permutations::internal::permuteJustLocal<Backend::MC, Device::CPU, T, Coord::Col>(
i_begin, i_end, ws_hm.i5, ws_hm.e0, ws_hm.e2);
copy(idx_loc_begin, sz_loc_tiles, ws_hm.e2, ws.e1);
}

// Reorder Eigenvalues
applyIndex(i_begin, i_end, ws_h.i3, ws_h.d0, ws_hm.d1);
applyIndex(i_begin, i_end, ws_h.i3, ws_hm.z0, ws_hm.z1);
copy(idx_begin_tiles_vec, sz_tiles_vec, ws_hm.d1, ws_h.d0);
Expand Down

0 comments on commit d8ea3dd

Please sign in to comment.