From 59e6b02a53b906b8b861621b342ef1a1b23f17cb Mon Sep 17 00:00:00 2001 From: Peter Munch Date: Thu, 15 Aug 2024 23:03:02 +0200 Subject: [PATCH] Add prototype: matrix-free vector Laplace solver using Kokkos (#1240) uses dealii/dealii#17525 --- prototypes/CMakeLists.txt | 1 + prototypes/kokkos_poisson/CMakeLists.txt | 2 + prototypes/kokkos_poisson/kokkos_poisson.cc | 351 ++++++++++++++++++++ 3 files changed, 354 insertions(+) create mode 100644 prototypes/kokkos_poisson/CMakeLists.txt create mode 100644 prototypes/kokkos_poisson/kokkos_poisson.cc diff --git a/prototypes/CMakeLists.txt b/prototypes/CMakeLists.txt index 560aae1ede..18011a0bda 100644 --- a/prototypes/CMakeLists.txt +++ b/prototypes/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(chorin_navier_stokes) add_subdirectory(direct_gls_navier_stokes) add_subdirectory(direct_steady_navier_stokes) +add_subdirectory(kokkos_poisson) add_subdirectory(matrix_based_non_linear_poisson) add_subdirectory(matrix_free_non_linear_poisson) add_subdirectory(matrix_based_advection_diffusion) diff --git a/prototypes/kokkos_poisson/CMakeLists.txt b/prototypes/kokkos_poisson/CMakeLists.txt new file mode 100644 index 0000000000..d371d9b34f --- /dev/null +++ b/prototypes/kokkos_poisson/CMakeLists.txt @@ -0,0 +1,2 @@ +add_executable(kokkos_poisson kokkos_poisson.cc) +deal_ii_setup_target(kokkos_poisson) diff --git a/prototypes/kokkos_poisson/kokkos_poisson.cc b/prototypes/kokkos_poisson/kokkos_poisson.cc new file mode 100644 index 0000000000..e68240255d --- /dev/null +++ b/prototypes/kokkos_poisson/kokkos_poisson.cc @@ -0,0 +1,351 @@ +/* --------------------------------------------------------------------- + * + * Copyright (C) 2024 by the Lethe authors + * + * This file is part of the Lethe library + * + * The Lethe library is free software; you can use it, redistribute + * it, and/or modify it under the terms of the GNU Lesser General + * Public License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * The full text of the license can be found in the file LICENSE at + * the top level of the Lethe distribution. + * + * A simple matrix-free solver for solving a vector Laplace problem + * using Kokkos via Portable::MatrixFree and Porable::FEEvaluation. + * + * ---------------------------------------------------------------------*/ + +#include + +#include +#include +#include + +#include + +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +using namespace dealii; + +static unsigned int counter = 0; + +template +class LaplaceOperator; + +template +class LaplaceOperator +{ +public: + using VectorType = + LinearAlgebra::distributed::Vector; + + LaplaceOperator() = default; + + void + reinit(const Mapping &mapping, + const DoFHandler &dof_handler, + const AffineConstraints &constraints, + const Quadrature<1> &quadrature) + { + typename MatrixFree::AdditionalData additional_data; + additional_data.mapping_update_flags = update_gradients; + + matrix_free.reinit( + mapping, dof_handler, constraints, quadrature, additional_data); + } + + void + initialize_dof_vector(VectorType &vec) const + { + matrix_free.initialize_dof_vector(vec); + } + + void + vmult(VectorType &dst, const VectorType &src) const + { + matrix_free.cell_loop(&LaplaceOperator::local_apply, this, dst, src, true); + } + +private: + void + local_apply(const MatrixFree &data, + VectorType &dst, + const VectorType &src, + const std::pair &cell_range) const + { + FEEvaluation phi(data); + for (unsigned int cell = cell_range.first; cell < cell_range.second; ++cell) + { + phi.reinit(cell); + + phi.read_dof_values_plain(src); + phi.evaluate(EvaluationFlags::gradients); + for (unsigned int q = 0; q < phi.n_q_points; ++q) + phi.submit_gradient(phi.get_gradient(q), q); + phi.integrate(EvaluationFlags::gradients); + phi.distribute_local_to_global(dst); + } + } + + MatrixFree matrix_free; +}; + + + +template +class LaplaceOperatorQuad +{ +public: + DEAL_II_HOST_DEVICE void + operator()(CUDAWrappers:: + FEEvaluation + *fe_eval, + const int q_point) const + { + fe_eval->submit_gradient(fe_eval->get_gradient(q_point), q_point); + } +}; + +template +class LaplaceOperatorLocal +{ +public: + DEAL_II_HOST_DEVICE void + operator()( + const unsigned int cell, + const typename CUDAWrappers::MatrixFree::Data *gpu_data, + CUDAWrappers::SharedData *shared_data, + const Number *src, + Number *dst) const + { + (void)cell; // TODO? + + CUDAWrappers:: + FEEvaluation + fe_eval( + /*cell,*/ gpu_data, shared_data); + fe_eval.read_dof_values(src); + fe_eval.evaluate(false, true); + fe_eval.apply_for_each_quad_point( + LaplaceOperatorQuad()); + fe_eval.integrate(false, true); + fe_eval.distribute_local_to_global(dst); + } + static const unsigned int n_dofs_1d = fe_degree + 1; + static const unsigned int n_local_dofs = Utilities::pow(fe_degree + 1, dim); + static const unsigned int n_q_points = Utilities::pow(fe_degree + 1, dim); +}; + +template +class LaplaceOperator +{ +public: + using VectorType = + LinearAlgebra::distributed::Vector; + + LaplaceOperator() = default; + + void + reinit(const Mapping &mapping, + const DoFHandler &dof_handler, + const AffineConstraints &constraints, + const Quadrature<1> &quadrature) + { + typename CUDAWrappers::MatrixFree::AdditionalData + additional_data; + additional_data.mapping_update_flags = update_JxW_values | update_gradients; + + matrix_free.reinit( + mapping, dof_handler, constraints, quadrature, additional_data); + } + + void + initialize_dof_vector(VectorType &vec) const + { + matrix_free.initialize_dof_vector(vec); + } + + void + vmult(VectorType &dst, const VectorType &src) const + { + dst = 0.0; // TODO: annoying + LaplaceOperatorLocal local_operator; + matrix_free.cell_loop(local_operator, src, dst); + matrix_free.copy_constrained_values(src, dst); // TODO: annoying + } + +private: + CUDAWrappers::MatrixFree matrix_free; +}; + + + +template +class AnalyticalFunction : public Function +{ +public: + AnalyticalFunction(const unsigned int n_components) + : Function(n_components) + {} + + virtual T + value(const Point &p, const unsigned int component = 0) const override + { + double temp = 0.0; + + for (unsigned int d = 0; d < dim; ++d) + temp += std::sin(p[d]); + + return temp * (1.0 + component); + } +}; + + + +template +void +run(const unsigned int n_refinements, ConvergenceTable &table) +{ + const MPI_Comm comm = MPI_COMM_WORLD; + + using Number = double; + using VectorType = LinearAlgebra::distributed::Vector; + + parallel::distributed::Triangulation tria(comm); + + GridGenerator::hyper_cube(tria); + tria.refine_global(n_refinements); + + const MappingQ1 mapping; + const FE_Q fe_q(degree); + const FESystem fe(fe_q, n_components); + const QGauss quadrature(degree + 1); + + DoFHandler dof_handler(tria); + dof_handler.distribute_dofs(fe); + + AffineConstraints constraints; + DoFTools::make_zero_boundary_constraints(dof_handler, constraints); + constraints.close(); + + LaplaceOperator + laplace_operator; + + laplace_operator.reinit(mapping, + dof_handler, + constraints, + quadrature.get_tensor_basis()[0]); + + VectorType src, dst; + + laplace_operator.initialize_dof_vector(src); + laplace_operator.initialize_dof_vector(dst); + + { + LinearAlgebra::distributed::Vector src_host(src.get_partitioner()); + + VectorTools::create_right_hand_side( + mapping, + dof_handler, + quadrature, + AnalyticalFunction(n_components), + src_host, + constraints); + + LinearAlgebra::ReadWriteVector rw_vector( + src.get_partitioner()->locally_owned_range()); + rw_vector.import(src_host, VectorOperation::insert); + src.import(rw_vector, VectorOperation::insert); + + dst = 0.0; + } + + PreconditionIdentity preconditioner; + + ReductionControl solver_control; + SolverCG solver(solver_control); + solver.solve(laplace_operator, dst, src, preconditioner); + + { + LinearAlgebra::distributed::Vector dst_host(dst.get_partitioner()); + + LinearAlgebra::ReadWriteVector rw_vector( + src.get_partitioner()->locally_owned_range()); + rw_vector.import(dst, VectorOperation::insert); + dst_host.import(rw_vector, VectorOperation::insert); + + std::string file_name = "solution_" + std::to_string(counter++) + ".vtu"; + + DataOut data_out; + + DataOutBase::VtkFlags flags; + flags.write_higher_order_cells = true; + data_out.set_flags(flags); + + data_out.attach_dof_handler(dof_handler); + data_out.add_data_vector(dst_host, "solution"); + data_out.build_patches(mapping, + degree + 1, + DataOut::CurvedCellRegion::curved_inner_cells); + data_out.write_vtu_in_parallel(file_name, MPI_COMM_WORLD); + } + + table.add_value("fe_degree", degree); + table.add_value("n_refinements", n_refinements); + table.add_value("n_components", n_components); + table.add_value("n_dofs", dof_handler.n_dofs()); + + if (std::is_same_v) + table.add_value("version", "host"); + else + table.add_value("version", "default"); + + table.add_value("norm", dst.l2_norm()); +} + +int +main(int argc, char **argv) +{ + Utilities::MPI::MPI_InitFinalize mpi_initialization(argc, argv, 1); + + const unsigned int dim = 2; + const unsigned int fe_degree = 3; + unsigned int n_refinements = 3; + + ConvergenceTable table; + + run(n_refinements, table); + run(n_refinements, table); + run(n_refinements, table); + run(n_refinements, table); + run(n_refinements, table); + run(n_refinements, table); + + table.write_text(std::cout); +}